[www-releases] r349965 - Add 7.0.1 docs for clang, llvm, and lld
Tom Stellard via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 21 13:53:04 PST 2018
Added: www-releases/trunk/7.0.1/docs/AMDGPUAsmGFX9.html
URL: http://llvm.org/viewvc/llvm-project/www-releases/trunk/7.0.1/docs/AMDGPUAsmGFX9.html?rev=349965&view=auto
==============================================================================
--- www-releases/trunk/7.0.1/docs/AMDGPUAsmGFX9.html (added)
+++ www-releases/trunk/7.0.1/docs/AMDGPUAsmGFX9.html Fri Dec 21 13:53:02 2018
@@ -0,0 +1,1992 @@
+
+
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Syntax of GFX9 Instructions — LLVM 7 documentation</title>
+
+ <link rel="stylesheet" href="_static/llvm-theme.css" type="text/css" />
+ <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '',
+ VERSION: '7',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="_static/jquery.js"></script>
+ <script type="text/javascript" src="_static/underscore.js"></script>
+ <script type="text/javascript" src="_static/doctools.js"></script>
+ <link rel="top" title="LLVM 7 documentation" href="index.html" />
+ <link rel="up" title="User Guide for AMDGPU Backend" href="AMDGPUUsage.html" />
+ <link rel="next" title="Syntax of AMDGPU Assembler Operands and Modifiers" href="AMDGPUOperandSyntax.html" />
+ <link rel="prev" title="Syntax of GFX8 Instructions" href="AMDGPUAsmGFX8.html" />
+<style type="text/css">
+ table.right { float: right; margin-left: 20px; }
+ table.right td { border: 1px solid #ccc; }
+</style>
+
+ </head>
+ <body>
+<div class="logo">
+ <a href="index.html">
+ <img src="_static/logo.png"
+ alt="LLVM Logo" width="250" height="88"/></a>
+</div>
+
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="AMDGPUOperandSyntax.html" title="Syntax of AMDGPU Assembler Operands and Modifiers"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="AMDGPUAsmGFX8.html" title="Syntax of GFX8 Instructions"
+ accesskey="P">previous</a> |</li>
+ <li><a href="http://llvm.org/">LLVM Home</a> | </li>
+ <li><a href="index.html">Documentation</a>»</li>
+
+ <li><a href="AMDGPUUsage.html" accesskey="U">User Guide for AMDGPU Backend</a> »</li>
+ </ul>
+ </div>
+
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="body">
+
+ <div class="section" id="syntax-of-gfx9-instructions">
+<h1>Syntax of GFX9 Instructions<a class="headerlink" href="#syntax-of-gfx9-instructions" title="Permalink to this headline">¶</a></h1>
+<div class="contents local topic" id="contents">
+<ul class="simple">
+<li><a class="reference internal" href="#ds" id="id1">DS</a></li>
+<li><a class="reference internal" href="#exp" id="id2">EXP</a></li>
+<li><a class="reference internal" href="#flat" id="id3">FLAT</a></li>
+<li><a class="reference internal" href="#mimg" id="id4">MIMG</a></li>
+<li><a class="reference internal" href="#mubuf" id="id5">MUBUF</a></li>
+<li><a class="reference internal" href="#smem" id="id6">SMEM</a></li>
+<li><a class="reference internal" href="#sop1" id="id7">SOP1</a></li>
+<li><a class="reference internal" href="#sop2" id="id8">SOP2</a></li>
+<li><a class="reference internal" href="#sopc" id="id9">SOPC</a></li>
+<li><a class="reference internal" href="#sopk" id="id10">SOPK</a></li>
+<li><a class="reference internal" href="#sopp" id="id11">SOPP</a></li>
+<li><a class="reference internal" href="#vintrp" id="id12">VINTRP</a></li>
+<li><a class="reference internal" href="#vop1" id="id13">VOP1</a></li>
+<li><a class="reference internal" href="#vop2" id="id14">VOP2</a></li>
+<li><a class="reference internal" href="#vop3" id="id15">VOP3</a></li>
+<li><a class="reference internal" href="#vop3p" id="id16">VOP3P</a></li>
+<li><a class="reference internal" href="#vopc" id="id17">VOPC</a></li>
+</ul>
+</div>
+<div class="section" id="ds">
+<h2><a class="toc-backref" href="#id1">DS</a><a class="headerlink" href="#ds" title="Permalink to this headline">¶</a></h2>
+<pre class="literal-block">
+ds_add_f32 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_add_rtn_f32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_add_rtn_u32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_add_rtn_u64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_add_src2_f32 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_add_src2_u32 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_add_src2_u64 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_add_u32 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_add_u64 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_and_b32 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_and_b64 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_and_rtn_b32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_and_rtn_b64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_and_src2_b32 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_and_src2_b64 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_append dst <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_bpermute_b32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a>
+ds_cmpst_b32 src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_cmpst_b64 src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_cmpst_f32 src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_cmpst_f64 src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_cmpst_rtn_b32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_cmpst_rtn_b64 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_cmpst_rtn_f32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_cmpst_rtn_f64 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_condxchg32_rtn_b64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_consume dst <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_dec_rtn_u32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_dec_rtn_u64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_dec_src2_u32 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_dec_src2_u64 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_dec_u32 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_dec_u64 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_gws_barrier src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_gws_init src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_gws_sema_br src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_gws_sema_p <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_gws_sema_release_all <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_gws_sema_v <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_inc_rtn_u32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_inc_rtn_u64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_inc_src2_u32 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_inc_src2_u64 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_inc_u32 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_inc_u64 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_max_f32 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_max_f64 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_max_i32 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_max_i64 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_max_rtn_f32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_max_rtn_f64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_max_rtn_i32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_max_rtn_i64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_max_rtn_u32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_max_rtn_u64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_max_src2_f32 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_max_src2_f64 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_max_src2_i32 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_max_src2_i64 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_max_src2_u32 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_max_src2_u64 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_max_u32 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_max_u64 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_min_f32 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_min_f64 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_min_i32 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_min_i64 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_min_rtn_f32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_min_rtn_f64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_min_rtn_i32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_min_rtn_i64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_min_rtn_u32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_min_rtn_u64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_min_src2_f32 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_min_src2_f64 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_min_src2_i32 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_min_src2_i64 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_min_src2_u32 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_min_src2_u64 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_min_u32 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_min_u64 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_mskor_b32 src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_mskor_b64 src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_mskor_rtn_b32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_mskor_rtn_b64 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_nop
+ds_or_b32 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_or_b64 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_or_rtn_b32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_or_rtn_b64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_or_src2_b32 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_or_src2_b64 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_ordered_count dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_permute_b32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a>
+ds_read2_b32 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset8"><em>ds_offset8</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset8"><em>ds_offset8</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_read2_b64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset8"><em>ds_offset8</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset8"><em>ds_offset8</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_read2st64_b32 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset8"><em>ds_offset8</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset8"><em>ds_offset8</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_read2st64_b64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset8"><em>ds_offset8</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset8"><em>ds_offset8</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_read_b128 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_read_b32 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_read_b64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_read_b96 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_read_i16 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_read_i8 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_read_i8_d16 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_read_i8_d16_hi dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_read_u16 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_read_u16_d16 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_read_u16_d16_hi dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_read_u8 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_read_u8_d16 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_read_u8_d16_hi dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_rsub_rtn_u32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_rsub_rtn_u64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_rsub_src2_u32 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_rsub_src2_u64 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_rsub_u32 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_rsub_u64 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_sub_rtn_u32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_sub_rtn_u64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_sub_src2_u32 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_sub_src2_u64 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_sub_u32 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_sub_u64 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_swizzle_b32 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-sw-offset16"><em>sw_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_wrap_rtn_b32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_write2_b32 src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset8"><em>ds_offset8</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset8"><em>ds_offset8</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_write2_b64 src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset8"><em>ds_offset8</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset8"><em>ds_offset8</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_write2st64_b32 src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset8"><em>ds_offset8</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset8"><em>ds_offset8</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_write2st64_b64 src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset8"><em>ds_offset8</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset8"><em>ds_offset8</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_write_b128 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_write_b16 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_write_b16_d16_hi src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_write_b32 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_write_b64 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_write_b8 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_write_b8_d16_hi src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_write_b96 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_write_src2_b32 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_write_src2_b64 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_wrxchg2_rtn_b32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset8"><em>ds_offset8</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset8"><em>ds_offset8</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_wrxchg2_rtn_b64 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset8"><em>ds_offset8</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset8"><em>ds_offset8</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_wrxchg2st64_rtn_b32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset8"><em>ds_offset8</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset8"><em>ds_offset8</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_wrxchg2st64_rtn_b64 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset8"><em>ds_offset8</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset8"><em>ds_offset8</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_wrxchg_rtn_b32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_wrxchg_rtn_b64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_xor_b32 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_xor_b64 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_xor_rtn_b32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_xor_rtn_b64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_xor_src2_b32 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+ds_xor_src2_b64 src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-ds-offset16"><em>ds_offset16</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-gds"><em>gds</em></a>
+</pre>
+</div>
+<div class="section" id="exp">
+<h2><a class="toc-backref" href="#id2">EXP</a><a class="headerlink" href="#exp" title="Permalink to this headline">¶</a></h2>
+<pre class="literal-block">
+exp dst, src0, src1, src2, src3 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-done"><em>done</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-compr"><em>compr</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vm"><em>vm</em></a>
+</pre>
+</div>
+<div class="section" id="flat">
+<h2><a class="toc-backref" href="#id3">FLAT</a><a class="headerlink" href="#flat" title="Permalink to this headline">¶</a></h2>
+<pre class="literal-block">
+flat_atomic_add dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_atomic_add_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_atomic_and dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_atomic_and_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_atomic_cmpswap dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_atomic_cmpswap_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_atomic_dec dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_atomic_dec_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_atomic_inc dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_atomic_inc_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_atomic_or dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_atomic_or_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_atomic_smax dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_atomic_smax_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_atomic_smin dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_atomic_smin_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_atomic_sub dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_atomic_sub_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_atomic_swap dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_atomic_swap_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_atomic_umax dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_atomic_umax_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_atomic_umin dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_atomic_umin_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_atomic_xor dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_atomic_xor_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_load_dword dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_load_dwordx2 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_load_dwordx3 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_load_dwordx4 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_load_sbyte dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_load_sbyte_d16 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_load_sbyte_d16_hi dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_load_short_d16 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_load_short_d16_hi dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_load_sshort dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_load_ubyte dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_load_ubyte_d16 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_load_ubyte_d16_hi dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_load_ushort dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_store_byte src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_store_byte_d16_hi src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_store_dword src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_store_dwordx2 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_store_dwordx3 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_store_dwordx4 src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_store_short src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+flat_store_short_d16_hi src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset12"><em>flat_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+global_atomic_add dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_atomic_add_x2 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_atomic_and dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_atomic_and_x2 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_atomic_cmpswap dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_atomic_cmpswap_x2 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_atomic_dec dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_atomic_dec_x2 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_atomic_inc dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_atomic_inc_x2 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_atomic_or dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_atomic_or_x2 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_atomic_smax dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_atomic_smax_x2 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_atomic_smin dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_atomic_smin_x2 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_atomic_sub dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_atomic_sub_x2 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_atomic_swap dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_atomic_swap_x2 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_atomic_umax dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_atomic_umax_x2 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_atomic_umin dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_atomic_umin_x2 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_atomic_xor dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_atomic_xor_x2 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_load_dword dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_load_dwordx2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_load_dwordx3 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_load_dwordx4 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_load_sbyte dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_load_sbyte_d16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_load_sbyte_d16_hi dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_load_short_d16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_load_short_d16_hi dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_load_sshort dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_load_ubyte dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_load_ubyte_d16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_load_ubyte_d16_hi dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_load_ushort dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_store_byte src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_store_byte_d16_hi src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_store_dword src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_store_dwordx2 src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_store_dwordx3 src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_store_dwordx4 src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_store_short src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+global_store_short_d16_hi src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a>
+scratch_load_dword dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+scratch_load_dwordx2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+scratch_load_dwordx3 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+scratch_load_dwordx4 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+scratch_load_sbyte dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+scratch_load_sbyte_d16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+scratch_load_sbyte_d16_hi dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+scratch_load_short_d16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+scratch_load_short_d16_hi dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+scratch_load_sshort dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+scratch_load_ubyte dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+scratch_load_ubyte_d16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+scratch_load_ubyte_d16_hi dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+scratch_load_ushort dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+scratch_store_byte src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+scratch_store_byte_d16_hi src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+scratch_store_dword src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+scratch_store_dwordx2 src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+scratch_store_dwordx3 src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+scratch_store_dwordx4 src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+scratch_store_short src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+scratch_store_short_d16_hi src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-flat-offset13"><em>flat_offset13</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+</pre>
+</div>
+<div class="section" id="mimg">
+<h2><a class="toc-backref" href="#id4">MIMG</a><a class="headerlink" href="#mimg" title="Permalink to this headline">¶</a></h2>
+<pre class="literal-block">
+image_atomic_add dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a>
+image_atomic_and dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a>
+image_atomic_cmpswap dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a>
+image_atomic_dec dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a>
+image_atomic_inc dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a>
+image_atomic_or dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a>
+image_atomic_smax dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a>
+image_atomic_smin dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a>
+image_atomic_sub dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a>
+image_atomic_swap dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a>
+image_atomic_umax dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a>
+image_atomic_umin dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a>
+image_atomic_xor dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a>
+image_gather4 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-d16"><em>d16</em></a>
+image_gather4_b dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-d16"><em>d16</em></a>
+image_gather4_c dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-d16"><em>d16</em></a>
+image_gather4_c_lz dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-d16"><em>d16</em></a>
+image_gather4_cl dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-d16"><em>d16</em></a>
+image_gather4_l dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-d16"><em>d16</em></a>
+image_gather4_lz dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-d16"><em>d16</em></a>
+image_gather4_lz_o dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-d16"><em>d16</em></a>
+image_gather4_o dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-d16"><em>d16</em></a>
+image_get_lod dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-tfe"><em>tfe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a>
+image_get_resinfo dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-tfe"><em>tfe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a>
+image_load dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-tfe"><em>tfe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-d16"><em>d16</em></a>
+image_load_mip dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-tfe"><em>tfe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-d16"><em>d16</em></a>
+image_load_mip_pck dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-tfe"><em>tfe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a>
+image_load_mip_pck_sgn dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-tfe"><em>tfe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a>
+image_load_pck dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-tfe"><em>tfe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a>
+image_load_pck_sgn dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-tfe"><em>tfe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a>
+image_sample dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-tfe"><em>tfe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-d16"><em>d16</em></a>
+image_sample_b dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-tfe"><em>tfe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-d16"><em>d16</em></a>
+image_sample_c dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-tfe"><em>tfe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-d16"><em>d16</em></a>
+image_sample_c_lz dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-tfe"><em>tfe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-d16"><em>d16</em></a>
+image_sample_cl dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-tfe"><em>tfe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-d16"><em>d16</em></a>
+image_sample_l dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-tfe"><em>tfe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-d16"><em>d16</em></a>
+image_sample_lz dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-tfe"><em>tfe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-d16"><em>d16</em></a>
+image_sample_lz_o dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-tfe"><em>tfe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-d16"><em>d16</em></a>
+image_sample_o dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-tfe"><em>tfe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-d16"><em>d16</em></a>
+image_store src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-d16"><em>d16</em></a>
+image_store_mip src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-d16"><em>d16</em></a>
+image_store_mip_pck src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a>
+image_store_pck src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dmask"><em>dmask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-unorm"><em>unorm</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lwe"><em>lwe</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-da"><em>da</em></a>
+</pre>
+</div>
+<div class="section" id="mubuf">
+<h2><a class="toc-backref" href="#id5">MUBUF</a><a class="headerlink" href="#mubuf" title="Permalink to this headline">¶</a></h2>
+<pre class="literal-block">
+buffer_atomic_add dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_atomic_add_x2 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_atomic_and dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_atomic_and_x2 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_atomic_cmpswap dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_atomic_cmpswap_x2 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_atomic_dec dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_atomic_dec_x2 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_atomic_inc dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_atomic_inc_x2 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_atomic_or dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_atomic_or_x2 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_atomic_smax dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_atomic_smax_x2 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_atomic_smin dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_atomic_smin_x2 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_atomic_sub dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_atomic_sub_x2 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_atomic_swap dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_atomic_swap_x2 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_atomic_umax dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_atomic_umax_x2 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_atomic_umin dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_atomic_umin_x2 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_atomic_xor dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_atomic_xor_x2 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_load_dword dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lds"><em>lds</em></a>
+buffer_load_dwordx2 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_load_dwordx3 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_load_dwordx4 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_load_format_d16_hi_x dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_load_format_d16_x dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_load_format_d16_xy dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_load_format_d16_xyz dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_load_format_d16_xyzw dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_load_format_x dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lds"><em>lds</em></a>
+buffer_load_format_xy dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_load_format_xyz dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_load_format_xyzw dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_load_sbyte dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lds"><em>lds</em></a>
+buffer_load_sbyte_d16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_load_sbyte_d16_hi dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_load_short_d16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_load_short_d16_hi dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_load_sshort dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lds"><em>lds</em></a>
+buffer_load_ubyte dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lds"><em>lds</em></a>
+buffer_load_ubyte_d16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_load_ubyte_d16_hi dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_load_ushort dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lds"><em>lds</em></a>
+buffer_store_byte src0, src1, src2, src3 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_store_byte_d16_hi src0, src1, src2, src3 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_store_dword src0, src1, src2, src3 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_store_dwordx2 src0, src1, src2, src3 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_store_dwordx3 src0, src1, src2, src3 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_store_dwordx4 src0, src1, src2, src3 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_store_format_d16_hi_x src0, src1, src2, src3 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_store_format_d16_x src0, src1, src2, src3 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_store_format_d16_xy src0, src1, src2, src3 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_store_format_d16_xyz src0, src1, src2, src3 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_store_format_d16_xyzw src0, src1, src2, src3 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_store_format_x src0, src1, src2, src3 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_store_format_xy src0, src1, src2, src3 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_store_format_xyz src0, src1, src2, src3 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_store_format_xyzw src0, src1, src2, src3 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_store_lds_dword src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-lds"><em>lds</em></a>
+buffer_store_short src0, src1, src2, src3 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_store_short_d16_hi src0, src1, src2, src3 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-idxen"><em>idxen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-offen"><em>offen</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-buf-offset12"><em>buf_offset12</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-slc"><em>slc</em></a>
+buffer_wbinvl1
+buffer_wbinvl1_vol
+</pre>
+</div>
+<div class="section" id="smem">
+<h2><a class="toc-backref" href="#id6">SMEM</a><a class="headerlink" href="#smem" title="Permalink to this headline">¶</a></h2>
+<pre class="literal-block">
+s_atc_probe src0, src1, src2
+s_atc_probe_buffer src0, src1, src2
+s_atomic_add dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_atomic_add_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_atomic_and dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_atomic_and_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_atomic_cmpswap dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_atomic_cmpswap_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_atomic_dec dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_atomic_dec_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_atomic_inc dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_atomic_inc_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_atomic_or dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_atomic_or_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_atomic_smax dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_atomic_smax_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_atomic_smin dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_atomic_smin_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_atomic_sub dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_atomic_sub_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_atomic_swap dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_atomic_swap_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_atomic_umax dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_atomic_umax_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_atomic_umin dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_atomic_umin_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_atomic_xor dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_atomic_xor_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_atomic_add dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_atomic_add_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_atomic_and dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_atomic_and_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_atomic_cmpswap dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_atomic_cmpswap_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_atomic_dec dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_atomic_dec_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_atomic_inc dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_atomic_inc_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_atomic_or dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_atomic_or_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_atomic_smax dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_atomic_smax_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_atomic_smin dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_atomic_smin_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_atomic_sub dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_atomic_sub_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_atomic_swap dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_atomic_swap_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_atomic_umax dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_atomic_umax_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_atomic_umin dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_atomic_umin_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_atomic_xor dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_atomic_xor_x2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_load_dword dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_load_dwordx16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_load_dwordx2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_load_dwordx4 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_load_dwordx8 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_store_dword src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_store_dwordx2 src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_buffer_store_dwordx4 src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_dcache_discard src0, src1
+s_dcache_discard_x2 src0, src1
+s_dcache_inv
+s_dcache_inv_vol
+s_dcache_wb
+s_dcache_wb_vol
+s_load_dword dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_load_dwordx16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_load_dwordx2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_load_dwordx4 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_load_dwordx8 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_memrealtime dst
+s_memtime dst
+s_scratch_load_dword dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_scratch_load_dwordx2 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_scratch_load_dwordx4 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_scratch_store_dword src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_scratch_store_dwordx2 src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_scratch_store_dwordx4 src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_store_dword src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_store_dwordx2 src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+s_store_dwordx4 src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-glc"><em>glc</em></a>
+</pre>
+</div>
+<div class="section" id="sop1">
+<h2><a class="toc-backref" href="#id7">SOP1</a><a class="headerlink" href="#sop1" title="Permalink to this headline">¶</a></h2>
+<div class="highlight-python"><pre>s_abs_i32 dst, src0
+s_and_saveexec_b64 dst, src0
+s_andn1_saveexec_b64 dst, src0
+s_andn1_wrexec_b64 dst, src0
+s_andn2_saveexec_b64 dst, src0
+s_andn2_wrexec_b64 dst, src0
+s_bcnt0_i32_b32 dst, src0
+s_bcnt0_i32_b64 dst, src0
+s_bcnt1_i32_b32 dst, src0
+s_bcnt1_i32_b64 dst, src0
+s_bitreplicate_b64_b32 dst, src0
+s_bitset0_b32 dst, src0
+s_bitset0_b64 dst, src0
+s_bitset1_b32 dst, src0
+s_bitset1_b64 dst, src0
+s_brev_b32 dst, src0
+s_brev_b64 dst, src0
+s_cbranch_join src0
+s_cmov_b32 dst, src0
+s_cmov_b64 dst, src0
+s_ff0_i32_b32 dst, src0
+s_ff0_i32_b64 dst, src0
+s_ff1_i32_b32 dst, src0
+s_ff1_i32_b64 dst, src0
+s_flbit_i32 dst, src0
+s_flbit_i32_b32 dst, src0
+s_flbit_i32_b64 dst, src0
+s_flbit_i32_i64 dst, src0
+s_getpc_b64 dst
+s_mov_b32 dst, src0
+s_mov_b64 dst, src0
+s_mov_fed_b32 dst, src0
+s_movreld_b32 dst, src0
+s_movreld_b64 dst, src0
+s_movrels_b32 dst, src0
+s_movrels_b64 dst, src0
+s_nand_saveexec_b64 dst, src0
+s_nor_saveexec_b64 dst, src0
+s_not_b32 dst, src0
+s_not_b64 dst, src0
+s_or_saveexec_b64 dst, src0
+s_orn1_saveexec_b64 dst, src0
+s_orn2_saveexec_b64 dst, src0
+s_quadmask_b32 dst, src0
+s_quadmask_b64 dst, src0
+s_rfe_b64 src0
+s_set_gpr_idx_idx src0
+s_setpc_b64 src0
+s_sext_i32_i16 dst, src0
+s_sext_i32_i8 dst, src0
+s_swappc_b64 dst, src0
+s_wqm_b32 dst, src0
+s_wqm_b64 dst, src0
+s_xnor_saveexec_b64 dst, src0
+s_xor_saveexec_b64 dst, src0</pre>
+</div>
+</div>
+<div class="section" id="sop2">
+<h2><a class="toc-backref" href="#id8">SOP2</a><a class="headerlink" href="#sop2" title="Permalink to this headline">¶</a></h2>
+<div class="highlight-python"><pre>s_absdiff_i32 dst, src0, src1
+s_add_i32 dst, src0, src1
+s_add_u32 dst, src0, src1
+s_addc_u32 dst, src0, src1
+s_and_b32 dst, src0, src1
+s_and_b64 dst, src0, src1
+s_andn2_b32 dst, src0, src1
+s_andn2_b64 dst, src0, src1
+s_ashr_i32 dst, src0, src1
+s_ashr_i64 dst, src0, src1
+s_bfe_i32 dst, src0, src1
+s_bfe_i64 dst, src0, src1
+s_bfe_u32 dst, src0, src1
+s_bfe_u64 dst, src0, src1
+s_bfm_b32 dst, src0, src1
+s_bfm_b64 dst, src0, src1
+s_cbranch_g_fork src0, src1
+s_cselect_b32 dst, src0, src1
+s_cselect_b64 dst, src0, src1
+s_lshl1_add_u32 dst, src0, src1
+s_lshl2_add_u32 dst, src0, src1
+s_lshl3_add_u32 dst, src0, src1
+s_lshl4_add_u32 dst, src0, src1
+s_lshl_b32 dst, src0, src1
+s_lshl_b64 dst, src0, src1
+s_lshr_b32 dst, src0, src1
+s_lshr_b64 dst, src0, src1
+s_max_i32 dst, src0, src1
+s_max_u32 dst, src0, src1
+s_min_i32 dst, src0, src1
+s_min_u32 dst, src0, src1
+s_mul_hi_i32 dst, src0, src1
+s_mul_hi_u32 dst, src0, src1
+s_mul_i32 dst, src0, src1
+s_nand_b32 dst, src0, src1
+s_nand_b64 dst, src0, src1
+s_nor_b32 dst, src0, src1
+s_nor_b64 dst, src0, src1
+s_or_b32 dst, src0, src1
+s_or_b64 dst, src0, src1
+s_orn2_b32 dst, src0, src1
+s_orn2_b64 dst, src0, src1
+s_pack_hh_b32_b16 dst, src0, src1
+s_pack_lh_b32_b16 dst, src0, src1
+s_pack_ll_b32_b16 dst, src0, src1
+s_rfe_restore_b64 src0, src1
+s_sub_i32 dst, src0, src1
+s_sub_u32 dst, src0, src1
+s_subb_u32 dst, src0, src1
+s_xnor_b32 dst, src0, src1
+s_xnor_b64 dst, src0, src1
+s_xor_b32 dst, src0, src1
+s_xor_b64 dst, src0, src1</pre>
+</div>
+</div>
+<div class="section" id="sopc">
+<h2><a class="toc-backref" href="#id9">SOPC</a><a class="headerlink" href="#sopc" title="Permalink to this headline">¶</a></h2>
+<div class="highlight-python"><pre>s_bitcmp0_b32 src0, src1
+s_bitcmp0_b64 src0, src1
+s_bitcmp1_b32 src0, src1
+s_bitcmp1_b64 src0, src1
+s_cmp_eq_i32 src0, src1
+s_cmp_eq_u32 src0, src1
+s_cmp_eq_u64 src0, src1
+s_cmp_ge_i32 src0, src1
+s_cmp_ge_u32 src0, src1
+s_cmp_gt_i32 src0, src1
+s_cmp_gt_u32 src0, src1
+s_cmp_le_i32 src0, src1
+s_cmp_le_u32 src0, src1
+s_cmp_lg_i32 src0, src1
+s_cmp_lg_u32 src0, src1
+s_cmp_lg_u64 src0, src1
+s_cmp_lt_i32 src0, src1
+s_cmp_lt_u32 src0, src1
+s_set_gpr_idx_on src0, src1
+s_setvskip src0, src1</pre>
+</div>
+</div>
+<div class="section" id="sopk">
+<h2><a class="toc-backref" href="#id10">SOPK</a><a class="headerlink" href="#sopk" title="Permalink to this headline">¶</a></h2>
+<div class="highlight-python"><pre>s_addk_i32 dst, src0
+s_call_b64 dst, src0
+s_cbranch_i_fork src0, src1
+s_cmovk_i32 dst, src0
+s_cmpk_eq_i32 src0, src1
+s_cmpk_eq_u32 src0, src1
+s_cmpk_ge_i32 src0, src1
+s_cmpk_ge_u32 src0, src1
+s_cmpk_gt_i32 src0, src1
+s_cmpk_gt_u32 src0, src1
+s_cmpk_le_i32 src0, src1
+s_cmpk_le_u32 src0, src1
+s_cmpk_lg_i32 src0, src1
+s_cmpk_lg_u32 src0, src1
+s_cmpk_lt_i32 src0, src1
+s_cmpk_lt_u32 src0, src1
+s_getreg_b32 dst, src0
+s_movk_i32 dst, src0
+s_mulk_i32 dst, src0
+s_setreg_b32 dst, src0
+s_setreg_imm32_b32 dst, src0</pre>
+</div>
+</div>
+<div class="section" id="sopp">
+<h2><a class="toc-backref" href="#id11">SOPP</a><a class="headerlink" href="#sopp" title="Permalink to this headline">¶</a></h2>
+<div class="highlight-python"><pre>s_barrier
+s_branch src0
+s_cbranch_cdbgsys src0
+s_cbranch_cdbgsys_and_user src0
+s_cbranch_cdbgsys_or_user src0
+s_cbranch_cdbguser src0
+s_cbranch_execnz src0
+s_cbranch_execz src0
+s_cbranch_scc0 src0
+s_cbranch_scc1 src0
+s_cbranch_vccnz src0
+s_cbranch_vccz src0
+s_decperflevel src0
+s_endpgm
+s_endpgm_ordered_ps_done
+s_endpgm_saved
+s_icache_inv
+s_incperflevel src0
+s_nop src0
+s_sendmsg src0
+s_sendmsghalt src0
+s_set_gpr_idx_mode src0
+s_set_gpr_idx_off
+s_sethalt src0
+s_setkill src0
+s_setprio src0
+s_sleep src0
+s_trap src0
+s_ttracedata
+s_waitcnt src0
+s_wakeup</pre>
+</div>
+</div>
+<div class="section" id="vintrp">
+<h2><a class="toc-backref" href="#id12">VINTRP</a><a class="headerlink" href="#vintrp" title="Permalink to this headline">¶</a></h2>
+<div class="highlight-python"><pre>v_interp_mov_f32 dst, src0, src1
+v_interp_p1_f32 dst, src0, src1
+v_interp_p2_f32 dst, src0, src1</pre>
+</div>
+</div>
+<div class="section" id="vop1">
+<h2><a class="toc-backref" href="#id13">VOP1</a><a class="headerlink" href="#vop1" title="Permalink to this headline">¶</a></h2>
+<pre class="literal-block">
+v_bfrev_b32 dst, src0
+v_bfrev_b32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_bfrev_b32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_ceil_f16 dst, src0
+v_ceil_f16_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_ceil_f16_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_ceil_f32 dst, src0
+v_ceil_f32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_ceil_f32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_ceil_f64 dst, src0
+v_clrexcp
+v_cos_f16 dst, src0
+v_cos_f16_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_cos_f16_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_cos_f32 dst, src0
+v_cos_f32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_cos_f32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_cvt_f16_f32 dst, src0
+v_cvt_f16_f32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_cvt_f16_f32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_cvt_f16_i16 dst, src0
+v_cvt_f16_i16_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_cvt_f16_i16_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_cvt_f16_u16 dst, src0
+v_cvt_f16_u16_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_cvt_f16_u16_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_cvt_f32_f16 dst, src0
+v_cvt_f32_f16_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_cvt_f32_f16_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_cvt_f32_f64 dst, src0
+v_cvt_f32_i32 dst, src0
+v_cvt_f32_i32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_cvt_f32_i32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_cvt_f32_u32 dst, src0
+v_cvt_f32_u32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_cvt_f32_u32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_cvt_f32_ubyte0 dst, src0
+v_cvt_f32_ubyte0_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_cvt_f32_ubyte0_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_cvt_f32_ubyte1 dst, src0
+v_cvt_f32_ubyte1_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_cvt_f32_ubyte1_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_cvt_f32_ubyte2 dst, src0
+v_cvt_f32_ubyte2_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_cvt_f32_ubyte2_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_cvt_f32_ubyte3 dst, src0
+v_cvt_f32_ubyte3_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_cvt_f32_ubyte3_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_cvt_f64_f32 dst, src0
+v_cvt_f64_i32 dst, src0
+v_cvt_f64_u32 dst, src0
+v_cvt_flr_i32_f32 dst, src0
+v_cvt_flr_i32_f32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_cvt_flr_i32_f32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_cvt_i16_f16 dst, src0
+v_cvt_i16_f16_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_cvt_i16_f16_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_cvt_i32_f32 dst, src0
+v_cvt_i32_f32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_cvt_i32_f32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_cvt_i32_f64 dst, src0
+v_cvt_norm_i16_f16 dst, src0
+v_cvt_norm_i16_f16_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_cvt_norm_i16_f16_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_cvt_norm_u16_f16 dst, src0
+v_cvt_norm_u16_f16_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_cvt_norm_u16_f16_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_cvt_off_f32_i4 dst, src0
+v_cvt_off_f32_i4_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_cvt_off_f32_i4_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_cvt_rpi_i32_f32 dst, src0
+v_cvt_rpi_i32_f32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_cvt_rpi_i32_f32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_cvt_u16_f16 dst, src0
+v_cvt_u16_f16_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_cvt_u16_f16_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_cvt_u32_f32 dst, src0
+v_cvt_u32_f32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_cvt_u32_f32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_cvt_u32_f64 dst, src0
+v_exp_f16 dst, src0
+v_exp_f16_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_exp_f16_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_exp_f32 dst, src0
+v_exp_f32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_exp_f32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_exp_legacy_f32 dst, src0
+v_exp_legacy_f32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_exp_legacy_f32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_ffbh_i32 dst, src0
+v_ffbh_i32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_ffbh_i32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_ffbh_u32 dst, src0
+v_ffbh_u32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_ffbh_u32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_ffbl_b32 dst, src0
+v_ffbl_b32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_ffbl_b32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_floor_f16 dst, src0
+v_floor_f16_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_floor_f16_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_floor_f32 dst, src0
+v_floor_f32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_floor_f32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_floor_f64 dst, src0
+v_fract_f16 dst, src0
+v_fract_f16_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_fract_f16_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_fract_f32 dst, src0
+v_fract_f32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_fract_f32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_fract_f64 dst, src0
+v_frexp_exp_i16_f16 dst, src0
+v_frexp_exp_i16_f16_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_frexp_exp_i16_f16_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_frexp_exp_i32_f32 dst, src0
+v_frexp_exp_i32_f32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_frexp_exp_i32_f32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_frexp_exp_i32_f64 dst, src0
+v_frexp_mant_f16 dst, src0
+v_frexp_mant_f16_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_frexp_mant_f16_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_frexp_mant_f32 dst, src0
+v_frexp_mant_f32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_frexp_mant_f32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_frexp_mant_f64 dst, src0
+v_log_f16 dst, src0
+v_log_f16_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_log_f16_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_log_f32 dst, src0
+v_log_f32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_log_f32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_log_legacy_f32 dst, src0
+v_log_legacy_f32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_log_legacy_f32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_mov_b32 dst, src0
+v_mov_b32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_mov_b32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_mov_fed_b32 dst, src0
+v_mov_fed_b32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_mov_fed_b32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_nop
+v_not_b32 dst, src0
+v_not_b32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_not_b32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_rcp_f16 dst, src0
+v_rcp_f16_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_rcp_f16_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_rcp_f32 dst, src0
+v_rcp_f32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_rcp_f32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_rcp_f64 dst, src0
+v_rcp_iflag_f32 dst, src0
+v_rcp_iflag_f32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_rcp_iflag_f32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_readfirstlane_b32 dst, src0
+v_rndne_f16 dst, src0
+v_rndne_f16_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_rndne_f16_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_rndne_f32 dst, src0
+v_rndne_f32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_rndne_f32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_rndne_f64 dst, src0
+v_rsq_f16 dst, src0
+v_rsq_f16_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_rsq_f16_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_rsq_f32 dst, src0
+v_rsq_f32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_rsq_f32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_rsq_f64 dst, src0
+v_sat_pk_u8_i16 dst, src0
+v_sat_pk_u8_i16_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_sat_pk_u8_i16_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_screen_partition_4se_b32 dst, src0
+v_screen_partition_4se_b32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_screen_partition_4se_b32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_sin_f16 dst, src0
+v_sin_f16_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_sin_f16_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_sin_f32 dst, src0
+v_sin_f32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_sin_f32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_sqrt_f16 dst, src0
+v_sqrt_f16_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_sqrt_f16_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_sqrt_f32 dst, src0
+v_sqrt_f32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_sqrt_f32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_sqrt_f64 dst, src0
+v_swap_b32 dst, src0
+v_trunc_f16 dst, src0
+v_trunc_f16_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_trunc_f16_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_trunc_f32 dst, src0
+v_trunc_f32_dpp dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_trunc_f32_sdwa dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a>
+v_trunc_f64 dst, src0
+</pre>
+</div>
+<div class="section" id="vop2">
+<h2><a class="toc-backref" href="#id14">VOP2</a><a class="headerlink" href="#vop2" title="Permalink to this headline">¶</a></h2>
+<pre class="literal-block">
+v_add_co_u32 dst0, dst1, src0, src1
+v_add_co_u32_dpp dst0, dst1, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_add_co_u32_sdwa dst0, dst1, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_add_f16 dst, src0, src1
+v_add_f16_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_add_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_add_f32 dst, src0, src1
+v_add_f32_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_add_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_add_u16 dst, src0, src1
+v_add_u16_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_add_u16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_add_u32 dst, src0, src1
+v_add_u32_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_add_u32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_addc_co_u32 dst0, dst1, src0, src1, src2
+v_addc_co_u32_dpp dst0, dst1, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_addc_co_u32_sdwa dst0, dst1, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_and_b32 dst, src0, src1
+v_and_b32_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_and_b32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_ashrrev_i16 dst, src0, src1
+v_ashrrev_i16_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_ashrrev_i16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_ashrrev_i32 dst, src0, src1
+v_ashrrev_i32_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_ashrrev_i32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cndmask_b32 dst, src0, src1, src2
+v_cndmask_b32_dpp dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_cndmask_b32_sdwa dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_ldexp_f16 dst, src0, src1
+v_ldexp_f16_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_ldexp_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_lshlrev_b16 dst, src0, src1
+v_lshlrev_b16_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_lshlrev_b16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_lshlrev_b32 dst, src0, src1
+v_lshlrev_b32_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_lshlrev_b32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_lshrrev_b16 dst, src0, src1
+v_lshrrev_b16_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_lshrrev_b16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_lshrrev_b32 dst, src0, src1
+v_lshrrev_b32_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_lshrrev_b32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_mac_f16 dst, src0, src1
+v_mac_f16_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_mac_f32 dst, src0, src1
+v_mac_f32_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_madak_f16 dst, src0, src1, src2
+v_madak_f32 dst, src0, src1, src2
+v_madmk_f16 dst, src0, src1, src2
+v_madmk_f32 dst, src0, src1, src2
+v_max_f16 dst, src0, src1
+v_max_f16_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_max_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_max_f32 dst, src0, src1
+v_max_f32_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_max_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_max_i16 dst, src0, src1
+v_max_i16_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_max_i16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_max_i32 dst, src0, src1
+v_max_i32_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_max_i32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_max_u16 dst, src0, src1
+v_max_u16_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_max_u16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_max_u32 dst, src0, src1
+v_max_u32_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_max_u32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_min_f16 dst, src0, src1
+v_min_f16_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_min_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_min_f32 dst, src0, src1
+v_min_f32_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_min_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_min_i16 dst, src0, src1
+v_min_i16_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_min_i16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_min_i32 dst, src0, src1
+v_min_i32_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_min_i32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_min_u16 dst, src0, src1
+v_min_u16_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_min_u16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_min_u32 dst, src0, src1
+v_min_u32_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_min_u32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_mul_f16 dst, src0, src1
+v_mul_f16_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_mul_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_mul_f32 dst, src0, src1
+v_mul_f32_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_mul_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_mul_hi_i32_i24 dst, src0, src1
+v_mul_hi_i32_i24_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_mul_hi_i32_i24_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_mul_hi_u32_u24 dst, src0, src1
+v_mul_hi_u32_u24_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_mul_hi_u32_u24_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_mul_i32_i24 dst, src0, src1
+v_mul_i32_i24_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_mul_i32_i24_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_mul_legacy_f32 dst, src0, src1
+v_mul_legacy_f32_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_mul_legacy_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_mul_lo_u16 dst, src0, src1
+v_mul_lo_u16_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_mul_lo_u16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_mul_u32_u24 dst, src0, src1
+v_mul_u32_u24_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_mul_u32_u24_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_or_b32 dst, src0, src1
+v_or_b32_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_or_b32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_sub_co_u32 dst0, dst1, src0, src1
+v_sub_co_u32_dpp dst0, dst1, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_sub_co_u32_sdwa dst0, dst1, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_sub_f16 dst, src0, src1
+v_sub_f16_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_sub_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_sub_f32 dst, src0, src1
+v_sub_f32_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_sub_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_sub_u16 dst, src0, src1
+v_sub_u16_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_sub_u16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_sub_u32 dst, src0, src1
+v_sub_u32_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_sub_u32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_subb_co_u32 dst0, dst1, src0, src1, src2
+v_subb_co_u32_dpp dst0, dst1, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_subb_co_u32_sdwa dst0, dst1, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_subbrev_co_u32 dst0, dst1, src0, src1, src2
+v_subbrev_co_u32_dpp dst0, dst1, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_subbrev_co_u32_sdwa dst0, dst1, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_subrev_co_u32 dst0, dst1, src0, src1
+v_subrev_co_u32_dpp dst0, dst1, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_subrev_co_u32_sdwa dst0, dst1, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_subrev_f16 dst, src0, src1
+v_subrev_f16_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_subrev_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_subrev_f32 dst, src0, src1
+v_subrev_f32_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_subrev_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_subrev_u16 dst, src0, src1
+v_subrev_u16_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_subrev_u16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_subrev_u32 dst, src0, src1
+v_subrev_u32_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_subrev_u32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_xor_b32 dst, src0, src1
+v_xor_b32_dpp dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dpp-ctrl"><em>dpp_ctrl</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-row-mask"><em>row_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bank-mask"><em>bank_mask</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-bound-ctrl"><em>bound_ctrl</em></a>
+v_xor_b32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-sel"><em>dst_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-dst-unused"><em>dst_unused</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+</pre>
+</div>
+<div class="section" id="vop3">
+<h2><a class="toc-backref" href="#id15">VOP3</a><a class="headerlink" href="#vop3" title="Permalink to this headline">¶</a></h2>
+<pre class="literal-block">
+v_add3_u32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_add_co_u32_e64 dst0, dst1, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_add_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_add_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_add_f64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_add_i16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_add_i32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_add_lshl_u32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_add_u16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_add_u32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_addc_co_u32_e64 dst0, dst1, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_alignbit_b32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_alignbyte_b32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_and_b32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_and_or_b32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_ashrrev_i16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_ashrrev_i32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_ashrrev_i64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_bcnt_u32_b32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_bfe_i32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_bfe_u32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_bfi_b32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_bfm_b32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_bfrev_b32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_ceil_f16_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_ceil_f32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_ceil_f64_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_clrexcp_e64 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_class_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_class_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_class_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_eq_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_eq_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_eq_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_eq_i16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_eq_i32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_eq_i64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_eq_u16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_eq_u32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_eq_u64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_f_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_f_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_f_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_f_i16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_f_i32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_f_i64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_f_u16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_f_u32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_f_u64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_ge_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_ge_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_ge_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_ge_i16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_ge_i32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_ge_i64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_ge_u16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_ge_u32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_ge_u64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_gt_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_gt_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_gt_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_gt_i16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_gt_i32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_gt_i64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_gt_u16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_gt_u32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_gt_u64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_le_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_le_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_le_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_le_i16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_le_i32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_le_i64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_le_u16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_le_u32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_le_u64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_lg_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_lg_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_lg_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_lt_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_lt_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_lt_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_lt_i16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_lt_i32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_lt_i64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_lt_u16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_lt_u32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_lt_u64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_ne_i16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_ne_i32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_ne_i64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_ne_u16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_ne_u32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_ne_u64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_neq_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_neq_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_neq_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_nge_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_nge_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_nge_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_ngt_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_ngt_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_ngt_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_nle_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_nle_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_nle_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_nlg_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_nlg_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_nlg_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_nlt_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_nlt_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_nlt_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_o_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_o_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_o_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_t_i16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_t_i32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_t_i64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_t_u16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_t_u32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_t_u64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_tru_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_tru_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_tru_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_u_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_u_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmp_u_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_class_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_class_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_class_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_eq_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_eq_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_eq_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_eq_i16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_eq_i32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_eq_i64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_eq_u16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_eq_u32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_eq_u64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_f_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_f_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_f_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_f_i16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_f_i32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_f_i64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_f_u16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_f_u32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_f_u64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_ge_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_ge_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_ge_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_ge_i16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_ge_i32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_ge_i64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_ge_u16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_ge_u32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_ge_u64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_gt_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_gt_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_gt_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_gt_i16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_gt_i32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_gt_i64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_gt_u16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_gt_u32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_gt_u64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_le_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_le_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_le_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_le_i16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_le_i32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_le_i64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_le_u16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_le_u32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_le_u64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_lg_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_lg_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_lg_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_lt_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_lt_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_lt_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_lt_i16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_lt_i32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_lt_i64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_lt_u16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_lt_u32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_lt_u64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_ne_i16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_ne_i32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_ne_i64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_ne_u16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_ne_u32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_ne_u64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_neq_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_neq_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_neq_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_nge_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_nge_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_nge_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_ngt_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_ngt_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_ngt_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_nle_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_nle_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_nle_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_nlg_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_nlg_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_nlg_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_nlt_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_nlt_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_nlt_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_o_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_o_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_o_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_t_i16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_t_i32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_t_i64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_t_u16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_t_u32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_t_u64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_tru_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_tru_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_tru_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_u_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_u_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cmpx_u_f64_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cndmask_b32_e64 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cos_f16_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cos_f32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cubeid_f32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cubema_f32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cubesc_f32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cubetc_f32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_f16_f32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_f16_i16_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_f16_u16_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_f32_f16_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_f32_f64_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_f32_i32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_f32_u32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_f32_ubyte0_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_f32_ubyte1_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_f32_ubyte2_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_f32_ubyte3_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_f64_f32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_f64_i32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_f64_u32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_flr_i32_f32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_i16_f16_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_i32_f32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_i32_f64_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_norm_i16_f16_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_norm_u16_f16_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_off_f32_i4_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_pk_i16_i32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_pk_u16_u32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_pk_u8_f32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_pkaccum_u8_f32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_pknorm_i16_f16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_pknorm_i16_f32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_pknorm_u16_f16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_pknorm_u16_f32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_pkrtz_f16_f32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_rpi_i32_f32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_u16_f16_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_u32_f32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_cvt_u32_f64_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_div_fixup_f16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_div_fixup_f32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_div_fixup_f64 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_div_fixup_legacy_f16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_div_fmas_f32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_div_fmas_f64 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_div_scale_f32 dst0, dst1, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_div_scale_f64 dst0, dst1, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_exp_f16_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_exp_f32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_exp_legacy_f32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_ffbh_i32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_ffbh_u32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_ffbl_b32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_floor_f16_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_floor_f32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_floor_f64_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_fma_f16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_fma_f32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_fma_f64 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_fma_legacy_f16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_fract_f16_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_fract_f32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_fract_f64_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_frexp_exp_i16_f16_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_frexp_exp_i32_f32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_frexp_exp_i32_f64_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_frexp_mant_f16_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_frexp_mant_f32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_frexp_mant_f64_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_interp_mov_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_interp_p1_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_interp_p1ll_f16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-high"><em>high</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_interp_p1lv_f16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-high"><em>high</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_interp_p2_f16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-high"><em>high</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_interp_p2_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_interp_p2_legacy_f16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-high"><em>high</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_ldexp_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_ldexp_f32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_ldexp_f64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_lerp_u8 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_log_f16_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_log_f32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_log_legacy_f32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_lshl_add_u32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_lshl_or_b32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_lshlrev_b16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_lshlrev_b32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_lshlrev_b64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_lshrrev_b16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_lshrrev_b32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_lshrrev_b64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mac_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mac_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mad_f16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mad_f32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mad_i16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mad_i32_i16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mad_i32_i24 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mad_i64_i32 dst0, dst1, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mad_legacy_f16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mad_legacy_f32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mad_legacy_i16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mad_legacy_u16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mad_u16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mad_u32_u16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mad_u32_u24 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mad_u64_u32 dst0, dst1, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_max3_f16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_max3_f32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_max3_i16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_max3_i32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_max3_u16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_max3_u32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_max_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_max_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_max_f64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_max_i16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_max_i32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_max_u16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_max_u32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mbcnt_hi_u32_b32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mbcnt_lo_u32_b32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_med3_f16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_med3_f32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_med3_i16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_med3_i32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_med3_u16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_med3_u32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_min3_f16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_min3_f32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_min3_i16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_min3_i32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_min3_u16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_min3_u32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_min_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_min_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_min_f64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_min_i16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_min_i32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_min_u16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_min_u32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mov_b32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mov_fed_b32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mqsad_pk_u16_u8 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mqsad_u32_u8 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_msad_u8 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mul_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mul_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mul_f64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mul_hi_i32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mul_hi_i32_i24_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mul_hi_u32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mul_hi_u32_u24_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mul_i32_i24_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mul_legacy_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mul_lo_u16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mul_lo_u32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_mul_u32_u24_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_nop_e64 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_not_b32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_or3_b32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_or_b32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_pack_b32_f16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_perm_b32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_qsad_pk_u16_u8 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_rcp_f16_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_rcp_f32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_rcp_f64_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_rcp_iflag_f32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_readlane_b32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_rndne_f16_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_rndne_f32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_rndne_f64_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_rsq_f16_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_rsq_f32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_rsq_f64_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_sad_hi_u8 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_sad_u16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_sad_u32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_sad_u8 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_sat_pk_u8_i16_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_screen_partition_4se_b32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_sin_f16_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_sin_f32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_sqrt_f16_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_sqrt_f32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_sqrt_f64_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_sub_co_u32_e64 dst0, dst1, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_sub_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_sub_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_sub_i16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-vop3-op-sel"><em>vop3_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_sub_i32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_sub_u16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_sub_u32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_subb_co_u32_e64 dst0, dst1, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_subbrev_co_u32_e64 dst0, dst1, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_subrev_co_u32_e64 dst0, dst1, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_subrev_f16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_subrev_f32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_subrev_u16_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_subrev_u32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_trig_preop_f64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_trunc_f16_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_trunc_f32_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_trunc_f64_e64 dst, src0 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_writelane_b32 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_xad_u32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+v_xor_b32_e64 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-omod"><em>omod</em></a>
+</pre>
+</div>
+<div class="section" id="vop3p">
+<h2><a class="toc-backref" href="#id16">VOP3P</a><a class="headerlink" href="#vop3p" title="Permalink to this headline">¶</a></h2>
+<pre class="literal-block">
+v_mad_mix_f32 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-mad-mix-op-sel"><em>mad_mix_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-mad-mix-op-sel-hi"><em>mad_mix_op_sel_hi</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a>
+v_mad_mixhi_f16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-mad-mix-op-sel"><em>mad_mix_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-mad-mix-op-sel-hi"><em>mad_mix_op_sel_hi</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a>
+v_mad_mixlo_f16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-mad-mix-op-sel"><em>mad_mix_op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-mad-mix-op-sel-hi"><em>mad_mix_op_sel_hi</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a>
+v_pk_add_f16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel"><em>op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel-hi"><em>op_sel_hi</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-neg-lo"><em>neg_lo</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-neg-hi"><em>neg_hi</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a>
+v_pk_add_i16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel"><em>op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel-hi"><em>op_sel_hi</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a>
+v_pk_add_u16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel"><em>op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel-hi"><em>op_sel_hi</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a>
+v_pk_ashrrev_i16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel"><em>op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel-hi"><em>op_sel_hi</em></a>
+v_pk_fma_f16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel"><em>op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel-hi"><em>op_sel_hi</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-neg-lo"><em>neg_lo</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-neg-hi"><em>neg_hi</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a>
+v_pk_lshlrev_b16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel"><em>op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel-hi"><em>op_sel_hi</em></a>
+v_pk_lshrrev_b16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel"><em>op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel-hi"><em>op_sel_hi</em></a>
+v_pk_mad_i16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel"><em>op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel-hi"><em>op_sel_hi</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a>
+v_pk_mad_u16 dst, src0, src1, src2 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel"><em>op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel-hi"><em>op_sel_hi</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a>
+v_pk_max_f16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel"><em>op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel-hi"><em>op_sel_hi</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-neg-lo"><em>neg_lo</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-neg-hi"><em>neg_hi</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a>
+v_pk_max_i16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel"><em>op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel-hi"><em>op_sel_hi</em></a>
+v_pk_max_u16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel"><em>op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel-hi"><em>op_sel_hi</em></a>
+v_pk_min_f16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel"><em>op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel-hi"><em>op_sel_hi</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-neg-lo"><em>neg_lo</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-neg-hi"><em>neg_hi</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a>
+v_pk_min_i16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel"><em>op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel-hi"><em>op_sel_hi</em></a>
+v_pk_min_u16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel"><em>op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel-hi"><em>op_sel_hi</em></a>
+v_pk_mul_f16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel"><em>op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel-hi"><em>op_sel_hi</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-neg-lo"><em>neg_lo</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-neg-hi"><em>neg_hi</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a>
+v_pk_mul_lo_u16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel"><em>op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel-hi"><em>op_sel_hi</em></a>
+v_pk_sub_i16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel"><em>op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel-hi"><em>op_sel_hi</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a>
+v_pk_sub_u16 dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel"><em>op_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-op-sel-hi"><em>op_sel_hi</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-clamp"><em>clamp</em></a>
+</pre>
+</div>
+<div class="section" id="vopc">
+<h2><a class="toc-backref" href="#id17">VOPC</a><a class="headerlink" href="#vopc" title="Permalink to this headline">¶</a></h2>
+<pre class="literal-block">
+v_cmp_class_f16 dst, src0, src1
+v_cmp_class_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_class_f32 dst, src0, src1
+v_cmp_class_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_class_f64 dst, src0, src1
+v_cmp_eq_f16 dst, src0, src1
+v_cmp_eq_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_eq_f32 dst, src0, src1
+v_cmp_eq_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_eq_f64 dst, src0, src1
+v_cmp_eq_i16 dst, src0, src1
+v_cmp_eq_i16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_eq_i32 dst, src0, src1
+v_cmp_eq_i32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_eq_i64 dst, src0, src1
+v_cmp_eq_u16 dst, src0, src1
+v_cmp_eq_u16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_eq_u32 dst, src0, src1
+v_cmp_eq_u32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_eq_u64 dst, src0, src1
+v_cmp_f_f16 dst, src0, src1
+v_cmp_f_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_f_f32 dst, src0, src1
+v_cmp_f_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_f_f64 dst, src0, src1
+v_cmp_f_i16 dst, src0, src1
+v_cmp_f_i16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_f_i32 dst, src0, src1
+v_cmp_f_i32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_f_i64 dst, src0, src1
+v_cmp_f_u16 dst, src0, src1
+v_cmp_f_u16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_f_u32 dst, src0, src1
+v_cmp_f_u32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_f_u64 dst, src0, src1
+v_cmp_ge_f16 dst, src0, src1
+v_cmp_ge_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_ge_f32 dst, src0, src1
+v_cmp_ge_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_ge_f64 dst, src0, src1
+v_cmp_ge_i16 dst, src0, src1
+v_cmp_ge_i16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_ge_i32 dst, src0, src1
+v_cmp_ge_i32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_ge_i64 dst, src0, src1
+v_cmp_ge_u16 dst, src0, src1
+v_cmp_ge_u16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_ge_u32 dst, src0, src1
+v_cmp_ge_u32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_ge_u64 dst, src0, src1
+v_cmp_gt_f16 dst, src0, src1
+v_cmp_gt_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_gt_f32 dst, src0, src1
+v_cmp_gt_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_gt_f64 dst, src0, src1
+v_cmp_gt_i16 dst, src0, src1
+v_cmp_gt_i16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_gt_i32 dst, src0, src1
+v_cmp_gt_i32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_gt_i64 dst, src0, src1
+v_cmp_gt_u16 dst, src0, src1
+v_cmp_gt_u16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_gt_u32 dst, src0, src1
+v_cmp_gt_u32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_gt_u64 dst, src0, src1
+v_cmp_le_f16 dst, src0, src1
+v_cmp_le_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_le_f32 dst, src0, src1
+v_cmp_le_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_le_f64 dst, src0, src1
+v_cmp_le_i16 dst, src0, src1
+v_cmp_le_i16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_le_i32 dst, src0, src1
+v_cmp_le_i32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_le_i64 dst, src0, src1
+v_cmp_le_u16 dst, src0, src1
+v_cmp_le_u16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_le_u32 dst, src0, src1
+v_cmp_le_u32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_le_u64 dst, src0, src1
+v_cmp_lg_f16 dst, src0, src1
+v_cmp_lg_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_lg_f32 dst, src0, src1
+v_cmp_lg_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_lg_f64 dst, src0, src1
+v_cmp_lt_f16 dst, src0, src1
+v_cmp_lt_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_lt_f32 dst, src0, src1
+v_cmp_lt_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_lt_f64 dst, src0, src1
+v_cmp_lt_i16 dst, src0, src1
+v_cmp_lt_i16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_lt_i32 dst, src0, src1
+v_cmp_lt_i32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_lt_i64 dst, src0, src1
+v_cmp_lt_u16 dst, src0, src1
+v_cmp_lt_u16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_lt_u32 dst, src0, src1
+v_cmp_lt_u32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_lt_u64 dst, src0, src1
+v_cmp_ne_i16 dst, src0, src1
+v_cmp_ne_i16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_ne_i32 dst, src0, src1
+v_cmp_ne_i32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_ne_i64 dst, src0, src1
+v_cmp_ne_u16 dst, src0, src1
+v_cmp_ne_u16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_ne_u32 dst, src0, src1
+v_cmp_ne_u32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_ne_u64 dst, src0, src1
+v_cmp_neq_f16 dst, src0, src1
+v_cmp_neq_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_neq_f32 dst, src0, src1
+v_cmp_neq_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_neq_f64 dst, src0, src1
+v_cmp_nge_f16 dst, src0, src1
+v_cmp_nge_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_nge_f32 dst, src0, src1
+v_cmp_nge_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_nge_f64 dst, src0, src1
+v_cmp_ngt_f16 dst, src0, src1
+v_cmp_ngt_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_ngt_f32 dst, src0, src1
+v_cmp_ngt_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_ngt_f64 dst, src0, src1
+v_cmp_nle_f16 dst, src0, src1
+v_cmp_nle_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_nle_f32 dst, src0, src1
+v_cmp_nle_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_nle_f64 dst, src0, src1
+v_cmp_nlg_f16 dst, src0, src1
+v_cmp_nlg_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_nlg_f32 dst, src0, src1
+v_cmp_nlg_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_nlg_f64 dst, src0, src1
+v_cmp_nlt_f16 dst, src0, src1
+v_cmp_nlt_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_nlt_f32 dst, src0, src1
+v_cmp_nlt_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_nlt_f64 dst, src0, src1
+v_cmp_o_f16 dst, src0, src1
+v_cmp_o_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_o_f32 dst, src0, src1
+v_cmp_o_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_o_f64 dst, src0, src1
+v_cmp_t_i16 dst, src0, src1
+v_cmp_t_i16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_t_i32 dst, src0, src1
+v_cmp_t_i32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_t_i64 dst, src0, src1
+v_cmp_t_u16 dst, src0, src1
+v_cmp_t_u16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_t_u32 dst, src0, src1
+v_cmp_t_u32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_t_u64 dst, src0, src1
+v_cmp_tru_f16 dst, src0, src1
+v_cmp_tru_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_tru_f32 dst, src0, src1
+v_cmp_tru_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_tru_f64 dst, src0, src1
+v_cmp_u_f16 dst, src0, src1
+v_cmp_u_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_u_f32 dst, src0, src1
+v_cmp_u_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmp_u_f64 dst, src0, src1
+v_cmpx_class_f16 dst, src0, src1
+v_cmpx_class_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_class_f32 dst, src0, src1
+v_cmpx_class_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_class_f64 dst, src0, src1
+v_cmpx_eq_f16 dst, src0, src1
+v_cmpx_eq_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_eq_f32 dst, src0, src1
+v_cmpx_eq_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_eq_f64 dst, src0, src1
+v_cmpx_eq_i16 dst, src0, src1
+v_cmpx_eq_i16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_eq_i32 dst, src0, src1
+v_cmpx_eq_i32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_eq_i64 dst, src0, src1
+v_cmpx_eq_u16 dst, src0, src1
+v_cmpx_eq_u16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_eq_u32 dst, src0, src1
+v_cmpx_eq_u32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_eq_u64 dst, src0, src1
+v_cmpx_f_f16 dst, src0, src1
+v_cmpx_f_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_f_f32 dst, src0, src1
+v_cmpx_f_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_f_f64 dst, src0, src1
+v_cmpx_f_i16 dst, src0, src1
+v_cmpx_f_i16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_f_i32 dst, src0, src1
+v_cmpx_f_i32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_f_i64 dst, src0, src1
+v_cmpx_f_u16 dst, src0, src1
+v_cmpx_f_u16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_f_u32 dst, src0, src1
+v_cmpx_f_u32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_f_u64 dst, src0, src1
+v_cmpx_ge_f16 dst, src0, src1
+v_cmpx_ge_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_ge_f32 dst, src0, src1
+v_cmpx_ge_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_ge_f64 dst, src0, src1
+v_cmpx_ge_i16 dst, src0, src1
+v_cmpx_ge_i16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_ge_i32 dst, src0, src1
+v_cmpx_ge_i32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_ge_i64 dst, src0, src1
+v_cmpx_ge_u16 dst, src0, src1
+v_cmpx_ge_u16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_ge_u32 dst, src0, src1
+v_cmpx_ge_u32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_ge_u64 dst, src0, src1
+v_cmpx_gt_f16 dst, src0, src1
+v_cmpx_gt_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_gt_f32 dst, src0, src1
+v_cmpx_gt_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_gt_f64 dst, src0, src1
+v_cmpx_gt_i16 dst, src0, src1
+v_cmpx_gt_i16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_gt_i32 dst, src0, src1
+v_cmpx_gt_i32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_gt_i64 dst, src0, src1
+v_cmpx_gt_u16 dst, src0, src1
+v_cmpx_gt_u16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_gt_u32 dst, src0, src1
+v_cmpx_gt_u32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_gt_u64 dst, src0, src1
+v_cmpx_le_f16 dst, src0, src1
+v_cmpx_le_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_le_f32 dst, src0, src1
+v_cmpx_le_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_le_f64 dst, src0, src1
+v_cmpx_le_i16 dst, src0, src1
+v_cmpx_le_i16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_le_i32 dst, src0, src1
+v_cmpx_le_i32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_le_i64 dst, src0, src1
+v_cmpx_le_u16 dst, src0, src1
+v_cmpx_le_u16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_le_u32 dst, src0, src1
+v_cmpx_le_u32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_le_u64 dst, src0, src1
+v_cmpx_lg_f16 dst, src0, src1
+v_cmpx_lg_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_lg_f32 dst, src0, src1
+v_cmpx_lg_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_lg_f64 dst, src0, src1
+v_cmpx_lt_f16 dst, src0, src1
+v_cmpx_lt_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_lt_f32 dst, src0, src1
+v_cmpx_lt_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_lt_f64 dst, src0, src1
+v_cmpx_lt_i16 dst, src0, src1
+v_cmpx_lt_i16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_lt_i32 dst, src0, src1
+v_cmpx_lt_i32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_lt_i64 dst, src0, src1
+v_cmpx_lt_u16 dst, src0, src1
+v_cmpx_lt_u16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_lt_u32 dst, src0, src1
+v_cmpx_lt_u32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_lt_u64 dst, src0, src1
+v_cmpx_ne_i16 dst, src0, src1
+v_cmpx_ne_i16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_ne_i32 dst, src0, src1
+v_cmpx_ne_i32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_ne_i64 dst, src0, src1
+v_cmpx_ne_u16 dst, src0, src1
+v_cmpx_ne_u16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_ne_u32 dst, src0, src1
+v_cmpx_ne_u32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_ne_u64 dst, src0, src1
+v_cmpx_neq_f16 dst, src0, src1
+v_cmpx_neq_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_neq_f32 dst, src0, src1
+v_cmpx_neq_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_neq_f64 dst, src0, src1
+v_cmpx_nge_f16 dst, src0, src1
+v_cmpx_nge_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_nge_f32 dst, src0, src1
+v_cmpx_nge_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_nge_f64 dst, src0, src1
+v_cmpx_ngt_f16 dst, src0, src1
+v_cmpx_ngt_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_ngt_f32 dst, src0, src1
+v_cmpx_ngt_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_ngt_f64 dst, src0, src1
+v_cmpx_nle_f16 dst, src0, src1
+v_cmpx_nle_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_nle_f32 dst, src0, src1
+v_cmpx_nle_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_nle_f64 dst, src0, src1
+v_cmpx_nlg_f16 dst, src0, src1
+v_cmpx_nlg_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_nlg_f32 dst, src0, src1
+v_cmpx_nlg_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_nlg_f64 dst, src0, src1
+v_cmpx_nlt_f16 dst, src0, src1
+v_cmpx_nlt_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_nlt_f32 dst, src0, src1
+v_cmpx_nlt_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_nlt_f64 dst, src0, src1
+v_cmpx_o_f16 dst, src0, src1
+v_cmpx_o_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_o_f32 dst, src0, src1
+v_cmpx_o_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_o_f64 dst, src0, src1
+v_cmpx_t_i16 dst, src0, src1
+v_cmpx_t_i16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_t_i32 dst, src0, src1
+v_cmpx_t_i32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_t_i64 dst, src0, src1
+v_cmpx_t_u16 dst, src0, src1
+v_cmpx_t_u16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_t_u32 dst, src0, src1
+v_cmpx_t_u32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_t_u64 dst, src0, src1
+v_cmpx_tru_f16 dst, src0, src1
+v_cmpx_tru_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_tru_f32 dst, src0, src1
+v_cmpx_tru_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_tru_f64 dst, src0, src1
+v_cmpx_u_f16 dst, src0, src1
+v_cmpx_u_f16_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_u_f32 dst, src0, src1
+v_cmpx_u_f32_sdwa dst, src0, src1 <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src0-sel"><em>src0_sel</em></a> <a class="reference internal" href="AMDGPUOperandSyntax.html#amdgpu-synid-src1-sel"><em>src1_sel</em></a>
+v_cmpx_u_f64 dst, src0, src1
+</pre>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="AMDGPUOperandSyntax.html" title="Syntax of AMDGPU Assembler Operands and Modifiers"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="AMDGPUAsmGFX8.html" title="Syntax of GFX8 Instructions"
+ >previous</a> |</li>
+ <li><a href="http://llvm.org/">LLVM Home</a> | </li>
+ <li><a href="index.html">Documentation</a>»</li>
+
+ <li><a href="AMDGPUUsage.html" >User Guide for AMDGPU Backend</a> »</li>
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2003-2018, LLVM Project.
+ Last updated on 2018-12-21.
+ Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.1.3.
+ </div>
+ </body>
+</html>
\ No newline at end of file
Added: www-releases/trunk/7.0.1/docs/AMDGPUOperandSyntax.html
URL: http://llvm.org/viewvc/llvm-project/www-releases/trunk/7.0.1/docs/AMDGPUOperandSyntax.html?rev=349965&view=auto
==============================================================================
--- www-releases/trunk/7.0.1/docs/AMDGPUOperandSyntax.html (added)
+++ www-releases/trunk/7.0.1/docs/AMDGPUOperandSyntax.html Fri Dec 21 13:53:02 2018
@@ -0,0 +1,1731 @@
+
+
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Syntax of AMDGPU Assembler Operands and Modifiers — LLVM 7 documentation</title>
+
+ <link rel="stylesheet" href="_static/llvm-theme.css" type="text/css" />
+ <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '',
+ VERSION: '7',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="_static/jquery.js"></script>
+ <script type="text/javascript" src="_static/underscore.js"></script>
+ <script type="text/javascript" src="_static/doctools.js"></script>
+ <link rel="top" title="LLVM 7 documentation" href="index.html" />
+ <link rel="up" title="User Guide for AMDGPU Backend" href="AMDGPUUsage.html" />
+ <link rel="next" title="Stack maps and patch points in LLVM" href="StackMaps.html" />
+ <link rel="prev" title="Syntax of GFX9 Instructions" href="AMDGPUAsmGFX9.html" />
+<style type="text/css">
+ table.right { float: right; margin-left: 20px; }
+ table.right td { border: 1px solid #ccc; }
+</style>
+
+ </head>
+ <body>
+<div class="logo">
+ <a href="index.html">
+ <img src="_static/logo.png"
+ alt="LLVM Logo" width="250" height="88"/></a>
+</div>
+
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="StackMaps.html" title="Stack maps and patch points in LLVM"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="AMDGPUAsmGFX9.html" title="Syntax of GFX9 Instructions"
+ accesskey="P">previous</a> |</li>
+ <li><a href="http://llvm.org/">LLVM Home</a> | </li>
+ <li><a href="index.html">Documentation</a>»</li>
+
+ <li><a href="AMDGPUUsage.html" accesskey="U">User Guide for AMDGPU Backend</a> »</li>
+ </ul>
+ </div>
+
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="body">
+
+ <div class="section" id="syntax-of-amdgpu-assembler-operands-and-modifiers">
+<h1>Syntax of AMDGPU Assembler Operands and Modifiers<a class="headerlink" href="#syntax-of-amdgpu-assembler-operands-and-modifiers" title="Permalink to this headline">¶</a></h1>
+<div class="contents local topic" id="contents">
+<ul class="simple">
+<li><a class="reference internal" href="#conventions" id="id21">Conventions</a></li>
+<li><a class="reference internal" href="#operands" id="id22">Operands</a></li>
+<li><a class="reference internal" href="#modifiers" id="id23">Modifiers</a><ul>
+<li><a class="reference internal" href="#ds-modifiers" id="id24">DS Modifiers</a><ul>
+<li><a class="reference internal" href="#ds-offset8" id="id25">ds_offset8</a></li>
+<li><a class="reference internal" href="#ds-offset16" id="id26">ds_offset16</a></li>
+<li><a class="reference internal" href="#sw-offset16" id="id27">sw_offset16</a></li>
+<li><a class="reference internal" href="#gds" id="id28">gds</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#exp-modifiers" id="id29">EXP Modifiers</a><ul>
+<li><a class="reference internal" href="#done" id="id30">done</a></li>
+<li><a class="reference internal" href="#compr" id="id31">compr</a></li>
+<li><a class="reference internal" href="#vm" id="id32">vm</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#flat-modifiers" id="id33">FLAT Modifiers</a><ul>
+<li><a class="reference internal" href="#flat-offset12" id="id34">flat_offset12</a></li>
+<li><a class="reference internal" href="#flat-offset13" id="id35">flat_offset13</a></li>
+<li><a class="reference internal" href="#glc" id="id36">glc</a></li>
+<li><a class="reference internal" href="#slc" id="id37">slc</a></li>
+<li><a class="reference internal" href="#tfe" id="id38">tfe</a></li>
+<li><a class="reference internal" href="#nv" id="id39">nv</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#mimg-modifiers" id="id40">MIMG Modifiers</a><ul>
+<li><a class="reference internal" href="#dmask" id="id41">dmask</a></li>
+<li><a class="reference internal" href="#unorm" id="id42">unorm</a></li>
+<li><a class="reference internal" href="#id1" id="id43">glc</a></li>
+<li><a class="reference internal" href="#id2" id="id44">slc</a></li>
+<li><a class="reference internal" href="#r128" id="id45">r128</a></li>
+<li><a class="reference internal" href="#id3" id="id46">tfe</a></li>
+<li><a class="reference internal" href="#lwe" id="id47">lwe</a></li>
+<li><a class="reference internal" href="#da" id="id48">da</a></li>
+<li><a class="reference internal" href="#d16" id="id49">d16</a></li>
+<li><a class="reference internal" href="#a16" id="id50">a16</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#miscellaneous-modifiers" id="id51">Miscellaneous Modifiers</a><ul>
+<li><a class="reference internal" href="#amdgpu-synid-glc" id="id52">glc</a></li>
+<li><a class="reference internal" href="#amdgpu-synid-slc" id="id53">slc</a></li>
+<li><a class="reference internal" href="#amdgpu-synid-tfe" id="id54">tfe</a></li>
+<li><a class="reference internal" href="#amdgpu-synid-nv" id="id55">nv</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#mubuf-mtbuf-modifiers" id="id56">MUBUF/MTBUF Modifiers</a><ul>
+<li><a class="reference internal" href="#amdgpu-synid-idxen" id="id57">idxen</a></li>
+<li><a class="reference internal" href="#offen" id="id58">offen</a></li>
+<li><a class="reference internal" href="#addr64" id="id59">addr64</a></li>
+<li><a class="reference internal" href="#buf-offset12" id="id60">buf_offset12</a></li>
+<li><a class="reference internal" href="#id8" id="id61">glc</a></li>
+<li><a class="reference internal" href="#id9" id="id62">slc</a></li>
+<li><a class="reference internal" href="#lds" id="id63">lds</a></li>
+<li><a class="reference internal" href="#id10" id="id64">tfe</a></li>
+<li><a class="reference internal" href="#dfmt" id="id65">dfmt</a></li>
+<li><a class="reference internal" href="#nfmt" id="id66">nfmt</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#smrd-smem-modifiers" id="id67">SMRD/SMEM Modifiers</a><ul>
+<li><a class="reference internal" href="#id11" id="id68">glc</a></li>
+<li><a class="reference internal" href="#id12" id="id69">nv</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#vintrp-modifiers" id="id70">VINTRP Modifiers</a><ul>
+<li><a class="reference internal" href="#high" id="id71">high</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#vop1-vop2-dpp-modifiers" id="id72">VOP1/VOP2 DPP Modifiers</a><ul>
+<li><a class="reference internal" href="#dpp-ctrl" id="id73">dpp_ctrl</a></li>
+<li><a class="reference internal" href="#row-mask" id="id74">row_mask</a></li>
+<li><a class="reference internal" href="#bank-mask" id="id75">bank_mask</a></li>
+<li><a class="reference internal" href="#bound-ctrl" id="id76">bound_ctrl</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#vop1-vop2-vopc-sdwa-modifiers" id="id77">VOP1/VOP2/VOPC SDWA Modifiers</a><ul>
+<li><a class="reference internal" href="#clamp" id="id78">clamp</a></li>
+<li><a class="reference internal" href="#omod" id="id79">omod</a></li>
+<li><a class="reference internal" href="#dst-sel" id="id80">dst_sel</a></li>
+<li><a class="reference internal" href="#dst-unused" id="id81">dst_unused</a></li>
+<li><a class="reference internal" href="#src0-sel" id="id82">src0_sel</a></li>
+<li><a class="reference internal" href="#src1-sel" id="id83">src1_sel</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#vop1-vop2-vopc-sdwa-operand-modifiers" id="id84">VOP1/VOP2/VOPC SDWA Operand Modifiers</a><ul>
+<li><a class="reference internal" href="#abs" id="id85">abs</a></li>
+<li><a class="reference internal" href="#neg" id="id86">neg</a></li>
+<li><a class="reference internal" href="#sext" id="id87">sext</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#vop3-modifiers" id="id88">VOP3 Modifiers</a><ul>
+<li><a class="reference internal" href="#vop3-op-sel" id="id89">vop3_op_sel</a></li>
+<li><a class="reference internal" href="#amdgpu-synid-clamp" id="id90">clamp</a></li>
+<li><a class="reference internal" href="#amdgpu-synid-omod" id="id91">omod</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#vop3-operand-modifiers" id="id92">VOP3 Operand Modifiers</a><ul>
+<li><a class="reference internal" href="#amdgpu-synid-abs" id="id93">abs</a></li>
+<li><a class="reference internal" href="#amdgpu-synid-neg" id="id94">neg</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#vop3p-modifiers" id="id95">VOP3P Modifiers</a><ul>
+<li><a class="reference internal" href="#op-sel" id="id96">op_sel</a></li>
+<li><a class="reference internal" href="#op-sel-hi" id="id97">op_sel_hi</a></li>
+<li><a class="reference internal" href="#neg-lo" id="id98">neg_lo</a></li>
+<li><a class="reference internal" href="#neg-hi" id="id99">neg_hi</a></li>
+<li><a class="reference internal" href="#id17" id="id100">clamp</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#vop3p-v-mad-mix-modifiers" id="id101">VOP3P V_MAD_MIX Modifiers</a><ul>
+<li><a class="reference internal" href="#mad-mix-op-sel" id="id102">mad_mix_op_sel</a></li>
+<li><a class="reference internal" href="#mad-mix-op-sel-hi" id="id103">mad_mix_op_sel_hi</a></li>
+<li><a class="reference internal" href="#id18" id="id104">abs</a></li>
+<li><a class="reference internal" href="#id19" id="id105">neg</a></li>
+<li><a class="reference internal" href="#id20" id="id106">clamp</a></li>
+</ul>
+</li>
+</ul>
+</li>
+</ul>
+</div>
+<div class="section" id="conventions">
+<h2><a class="toc-backref" href="#id21">Conventions</a><a class="headerlink" href="#conventions" title="Permalink to this headline">¶</a></h2>
+<p>The following conventions are used in syntax description:</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="24%" />
+<col width="76%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Notation</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>{0..N}</td>
+<td>Any integer value in the range from 0 to N (inclusive).
+Unless stated otherwise, this value may be specified as
+either a literal or an llvm expression.</td>
+</tr>
+<tr class="row-odd"><td><x></td>
+<td>Syntax and meaning of <em><x></em> is explained elsewhere.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="operands">
+<span id="amdgpu-syn-operands"></span><h2><a class="toc-backref" href="#id22">Operands</a><a class="headerlink" href="#operands" title="Permalink to this headline">¶</a></h2>
+<p>TBD</p>
+</div>
+<div class="section" id="modifiers">
+<span id="amdgpu-syn-modifiers"></span><h2><a class="toc-backref" href="#id23">Modifiers</a><a class="headerlink" href="#modifiers" title="Permalink to this headline">¶</a></h2>
+<div class="section" id="ds-modifiers">
+<h3><a class="toc-backref" href="#id24">DS Modifiers</a><a class="headerlink" href="#ds-modifiers" title="Permalink to this headline">¶</a></h3>
+<div class="section" id="ds-offset8">
+<span id="amdgpu-synid-ds-offset8"></span><h4><a class="toc-backref" href="#id25">ds_offset8</a><a class="headerlink" href="#ds-offset8" title="Permalink to this headline">¶</a></h4>
+<p>Specifies an immediate unsigned 8-bit offset, in bytes. The default value is 0.</p>
+<p>Used with DS instructions which have 2 addresses.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>offset:{0..0xFF}</td>
+<td>Specifies a 8-bit offset.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="ds-offset16">
+<span id="amdgpu-synid-ds-offset16"></span><h4><a class="toc-backref" href="#id26">ds_offset16</a><a class="headerlink" href="#ds-offset16" title="Permalink to this headline">¶</a></h4>
+<p>Specifies an immediate unsigned 16-bit offset, in bytes. The default value is 0.</p>
+<p>Used with DS instructions which have 1 address.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>offset:{0..0xFFFF}</td>
+<td>Specifies a 16-bit offset.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="sw-offset16">
+<span id="amdgpu-synid-sw-offset16"></span><h4><a class="toc-backref" href="#id27">sw_offset16</a><a class="headerlink" href="#sw-offset16" title="Permalink to this headline">¶</a></h4>
+<p>This is a special modifier which may be used with <em>ds_swizzle_b32</em> instruction only.
+Specifies a sizzle pattern in numeric or symbolic form. The default value is 0.</p>
+<p>See AMD documentation for more information.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="52%" />
+<col width="48%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>offset:{0..0xFFFF}</td>
+<td>Specifies a 16-bit swizzle pattern
+in a numeric form.</td>
+</tr>
+<tr class="row-odd"><td>offset:swizzle(QUAD_PERM,{0..3},{0..3},{0..3},{0..3})</td>
+<td>Specifies a quad permute mode pattern; each
+number is a lane id.</td>
+</tr>
+<tr class="row-even"><td>offset:swizzle(BITMASK_PERM, “<mask>”)</td>
+<td><p class="first">Specifies a bitmask permute mode pattern
+which converts a 5-bit lane id to another
+lane id with which the lane interacts.</p>
+<p><mask> is a 5 character sequence which
+specifies how to transform the bits of the
+lane id. The following characters are allowed:</p>
+<blockquote class="last">
+<div><ul class="simple">
+<li>“0” - set bit to 0.</li>
+<li>“1” - set bit to 1.</li>
+<li>“p” - preserve bit.</li>
+<li>“i” - inverse bit.</li>
+</ul>
+</div></blockquote>
+</td>
+</tr>
+<tr class="row-odd"><td>offset:swizzle(BROADCAST,{2..32},{0..N})</td>
+<td><p class="first">Specifies a broadcast mode.
+Broadcasts the value of any particular lane to
+all lanes in its group.</p>
+<p>The first numeric parameter is a group
+size and must be equal to 2, 4, 8, 16 or 32.</p>
+<p class="last">The second numeric parameter is an index of the
+lane being broadcasted. The index must not exceed
+group size.</p>
+</td>
+</tr>
+<tr class="row-even"><td>offset:swizzle(SWAP,{1..16})</td>
+<td>Specifies a swap mode.
+Swaps the neighboring groups of
+1, 2, 4, 8 or 16 lanes.</td>
+</tr>
+<tr class="row-odd"><td>offset:swizzle(REVERSE,{2..32})</td>
+<td>Specifies a reverse mode. Reverses
+the lanes for groups of 2, 4, 8, 16 or 32 lanes.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="gds">
+<span id="amdgpu-synid-gds"></span><h4><a class="toc-backref" href="#id28">gds</a><a class="headerlink" href="#gds" title="Permalink to this headline">¶</a></h4>
+<p>Specifies whether to use GDS or LDS memory (LDS is the default).</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>gds</td>
+<td>Use GDS memory.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="exp-modifiers">
+<h3><a class="toc-backref" href="#id29">EXP Modifiers</a><a class="headerlink" href="#exp-modifiers" title="Permalink to this headline">¶</a></h3>
+<div class="section" id="done">
+<span id="amdgpu-synid-done"></span><h4><a class="toc-backref" href="#id30">done</a><a class="headerlink" href="#done" title="Permalink to this headline">¶</a></h4>
+<p>Specifies if this is the last export from the shader to the target. By default, current
+instruction does not finish an export sequence.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>done</td>
+<td>Indicates the last export operation.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="compr">
+<span id="amdgpu-synid-compr"></span><h4><a class="toc-backref" href="#id31">compr</a><a class="headerlink" href="#compr" title="Permalink to this headline">¶</a></h4>
+<p>Indicates if the data are compressed (not compressed by default).</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>compr</td>
+<td>Data are compressed.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="vm">
+<span id="amdgpu-synid-vm"></span><h4><a class="toc-backref" href="#id32">vm</a><a class="headerlink" href="#vm" title="Permalink to this headline">¶</a></h4>
+<p>Specifies valid mask flag state (off by default).</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>vm</td>
+<td>Set valid mask flag.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="flat-modifiers">
+<h3><a class="toc-backref" href="#id33">FLAT Modifiers</a><a class="headerlink" href="#flat-modifiers" title="Permalink to this headline">¶</a></h3>
+<div class="section" id="flat-offset12">
+<span id="amdgpu-synid-flat-offset12"></span><h4><a class="toc-backref" href="#id34">flat_offset12</a><a class="headerlink" href="#flat-offset12" title="Permalink to this headline">¶</a></h4>
+<p>Specifies an immediate unsigned 12-bit offset, in bytes. The default value is 0.</p>
+<p>Cannot be used with <em>global/scratch</em> opcodes. GFX9 only.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>offset:{0..4095}</td>
+<td>Specifies a 12-bit unsigned offset.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="flat-offset13">
+<span id="amdgpu-synid-flat-offset13"></span><h4><a class="toc-backref" href="#id35">flat_offset13</a><a class="headerlink" href="#flat-offset13" title="Permalink to this headline">¶</a></h4>
+<p>Specifies an immediate signed 13-bit offset, in bytes. The default value is 0.</p>
+<p>Can be used with <em>global/scratch</em> opcodes only. GFX9 only.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>offset:{-4096..+4095}</td>
+<td>Specifies a 13-bit signed offset.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="glc">
+<h4><a class="toc-backref" href="#id36">glc</a><a class="headerlink" href="#glc" title="Permalink to this headline">¶</a></h4>
+<p>See a description <a class="reference internal" href="#amdgpu-synid-glc"><em>here</em></a>.</p>
+</div>
+<div class="section" id="slc">
+<h4><a class="toc-backref" href="#id37">slc</a><a class="headerlink" href="#slc" title="Permalink to this headline">¶</a></h4>
+<p>See a description <a class="reference internal" href="#amdgpu-synid-slc"><em>here</em></a>.</p>
+</div>
+<div class="section" id="tfe">
+<h4><a class="toc-backref" href="#id38">tfe</a><a class="headerlink" href="#tfe" title="Permalink to this headline">¶</a></h4>
+<p>See a description <a class="reference internal" href="#amdgpu-synid-tfe"><em>here</em></a>.</p>
+</div>
+<div class="section" id="nv">
+<h4><a class="toc-backref" href="#id39">nv</a><a class="headerlink" href="#nv" title="Permalink to this headline">¶</a></h4>
+<p>See a description <a class="reference internal" href="#amdgpu-synid-nv"><em>here</em></a>.</p>
+</div>
+</div>
+<div class="section" id="mimg-modifiers">
+<h3><a class="toc-backref" href="#id40">MIMG Modifiers</a><a class="headerlink" href="#mimg-modifiers" title="Permalink to this headline">¶</a></h3>
+<div class="section" id="dmask">
+<span id="amdgpu-synid-dmask"></span><h4><a class="toc-backref" href="#id41">dmask</a><a class="headerlink" href="#dmask" title="Permalink to this headline">¶</a></h4>
+<p>Specifies which channels (image components) are used by the operation. By default, no channels
+are used.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>dmask:{0..15}</td>
+<td>Each bit corresponds to one of 4 image
+components (RGBA). If the specified bit value
+is 0, the component is not used, value 1 means
+that the component is used.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+<p>This modifier has some limitations depending on instruction kind:</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Instruction Kind</th>
+<th class="head">Valid dmask Values</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>32-bit atomic cmpswap</td>
+<td>0x3</td>
+</tr>
+<tr class="row-odd"><td>other 32-bit atomic instructions</td>
+<td>0x1</td>
+</tr>
+<tr class="row-even"><td>64-bit atomic cmpswap</td>
+<td>0xF</td>
+</tr>
+<tr class="row-odd"><td>other 64-bit atomic instructions</td>
+<td>0x3</td>
+</tr>
+<tr class="row-even"><td>GATHER4</td>
+<td>0x1, 0x2, 0x4, 0x8</td>
+</tr>
+<tr class="row-odd"><td>Other instructions</td>
+<td>any value</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="unorm">
+<span id="amdgpu-synid-unorm"></span><h4><a class="toc-backref" href="#id42">unorm</a><a class="headerlink" href="#unorm" title="Permalink to this headline">¶</a></h4>
+<p>Specifies whether address is normalized or not (normalized by default).</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>unorm</td>
+<td>Force address to be un-normalized.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="id1">
+<h4><a class="toc-backref" href="#id43">glc</a><a class="headerlink" href="#id1" title="Permalink to this headline">¶</a></h4>
+<p>See a description <a class="reference internal" href="#amdgpu-synid-glc"><em>here</em></a>.</p>
+</div>
+<div class="section" id="id2">
+<h4><a class="toc-backref" href="#id44">slc</a><a class="headerlink" href="#id2" title="Permalink to this headline">¶</a></h4>
+<p>See a description <a class="reference internal" href="#amdgpu-synid-slc"><em>here</em></a>.</p>
+</div>
+<div class="section" id="r128">
+<span id="amdgpu-synid-r128"></span><h4><a class="toc-backref" href="#id45">r128</a><a class="headerlink" href="#r128" title="Permalink to this headline">¶</a></h4>
+<p>Specifies texture resource size. The default size is 256 bits.</p>
+<p>GFX7 and GFX8 only.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>r128</td>
+<td>Specifies 128 bits texture resource size.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="id3">
+<h4><a class="toc-backref" href="#id46">tfe</a><a class="headerlink" href="#id3" title="Permalink to this headline">¶</a></h4>
+<p>See a description <a class="reference internal" href="#amdgpu-synid-tfe"><em>here</em></a>.</p>
+</div>
+<div class="section" id="lwe">
+<span id="amdgpu-synid-lwe"></span><h4><a class="toc-backref" href="#id47">lwe</a><a class="headerlink" href="#lwe" title="Permalink to this headline">¶</a></h4>
+<p>Specifies LOD warning status (LOD warning is disabled by default).</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>lwe</td>
+<td>Enables LOD warning.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="da">
+<span id="amdgpu-synid-da"></span><h4><a class="toc-backref" href="#id48">da</a><a class="headerlink" href="#da" title="Permalink to this headline">¶</a></h4>
+<p>Specifies if an array index must be sent to TA. By default, array index is not sent.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>da</td>
+<td>Send an array-index to TA.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="d16">
+<span id="amdgpu-synid-d16"></span><h4><a class="toc-backref" href="#id49">d16</a><a class="headerlink" href="#d16" title="Permalink to this headline">¶</a></h4>
+<p>Specifies data size: 16 or 32 bits (32 bits by default). Not supported by GFX7.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>d16</td>
+<td><p class="first">Enables 16-bits data mode.</p>
+<p>On loads, convert data in memory to 16-bit
+format before storing it in VGPRs.</p>
+<p>For stores, convert 16-bit data in VGPRs to
+32 bits before going to memory.</p>
+<p class="last">Note that 16-bit data are stored in VGPRs
+unpacked in GFX8.0. In GFX8.1 and GFX9 16-bit
+data are packed.</p>
+</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="a16">
+<span id="amdgpu-synid-a16"></span><h4><a class="toc-backref" href="#id50">a16</a><a class="headerlink" href="#a16" title="Permalink to this headline">¶</a></h4>
+<p>Specifies size of image address components: 16 or 32 bits (32 bits by default). GFX9 only.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>a16</td>
+<td>Enables 16-bits image address components.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="miscellaneous-modifiers">
+<h3><a class="toc-backref" href="#id51">Miscellaneous Modifiers</a><a class="headerlink" href="#miscellaneous-modifiers" title="Permalink to this headline">¶</a></h3>
+<div class="section" id="amdgpu-synid-glc">
+<span id="id4"></span><h4><a class="toc-backref" href="#id52">glc</a><a class="headerlink" href="#amdgpu-synid-glc" title="Permalink to this headline">¶</a></h4>
+<p>This modifier has different meaning for loads, stores, and atomic operations.
+The default value is off (0).</p>
+<p>See AMD documentation for details.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>glc</td>
+<td>Set glc bit to 1.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="amdgpu-synid-slc">
+<span id="id5"></span><h4><a class="toc-backref" href="#id53">slc</a><a class="headerlink" href="#amdgpu-synid-slc" title="Permalink to this headline">¶</a></h4>
+<p>Specifies cache policy. The default value is off (0).</p>
+<p>See AMD documentation for details.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>slc</td>
+<td>Set slc bit to 1.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="amdgpu-synid-tfe">
+<span id="id6"></span><h4><a class="toc-backref" href="#id54">tfe</a><a class="headerlink" href="#amdgpu-synid-tfe" title="Permalink to this headline">¶</a></h4>
+<p>Controls access to partially resident textures. The default value is off (0).</p>
+<p>See AMD documentation for details.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>tfe</td>
+<td>Set tfe bit to 1.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="amdgpu-synid-nv">
+<span id="id7"></span><h4><a class="toc-backref" href="#id55">nv</a><a class="headerlink" href="#amdgpu-synid-nv" title="Permalink to this headline">¶</a></h4>
+<p>Specifies if instruction is operating on non-volatile memory. By default, memory is volatile.</p>
+<p>GFX9 only.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>nv</td>
+<td>Indicates that instruction operates on
+non-volatile memory.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="mubuf-mtbuf-modifiers">
+<h3><a class="toc-backref" href="#id56">MUBUF/MTBUF Modifiers</a><a class="headerlink" href="#mubuf-mtbuf-modifiers" title="Permalink to this headline">¶</a></h3>
+<div class="section" id="amdgpu-synid-idxen">
+<span id="idxen"></span><h4><a class="toc-backref" href="#id57">idxen</a><a class="headerlink" href="#amdgpu-synid-idxen" title="Permalink to this headline">¶</a></h4>
+<p>Specifies whether address components include an index. By default, no components are used.</p>
+<p>Can be used together with <a class="reference internal" href="#amdgpu-synid-offen"><em>offen</em></a>.</p>
+<p>Cannot be used with <a class="reference internal" href="#amdgpu-synid-addr64"><em>addr64</em></a>.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>idxen</td>
+<td>Address components include an index.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="offen">
+<span id="amdgpu-synid-offen"></span><h4><a class="toc-backref" href="#id58">offen</a><a class="headerlink" href="#offen" title="Permalink to this headline">¶</a></h4>
+<p>Specifies whether address components include an offset. By default, no components are used.</p>
+<p>Can be used together with <a class="reference internal" href="#amdgpu-synid-idxen"><em>idxen</em></a>.</p>
+<p>Cannot be used with <a class="reference internal" href="#amdgpu-synid-addr64"><em>addr64</em></a>.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>offen</td>
+<td>Address components include an offset.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="addr64">
+<span id="amdgpu-synid-addr64"></span><h4><a class="toc-backref" href="#id59">addr64</a><a class="headerlink" href="#addr64" title="Permalink to this headline">¶</a></h4>
+<p>Specifies whether a 64-bit address is used. By default, no address is used.</p>
+<p>GFX7 only. Cannot be used with <a class="reference internal" href="#amdgpu-synid-offen"><em>offen</em></a> and
+<a class="reference internal" href="#amdgpu-synid-idxen"><em>idxen</em></a> modifiers.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>addr64</td>
+<td>A 64-bit address is used.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="buf-offset12">
+<span id="amdgpu-synid-buf-offset12"></span><h4><a class="toc-backref" href="#id60">buf_offset12</a><a class="headerlink" href="#buf-offset12" title="Permalink to this headline">¶</a></h4>
+<p>Specifies an immediate unsigned 12-bit offset, in bytes. The default value is 0.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>offset:{0..0xFFF}</td>
+<td>Specifies a 12-bit unsigned offset.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="id8">
+<h4><a class="toc-backref" href="#id61">glc</a><a class="headerlink" href="#id8" title="Permalink to this headline">¶</a></h4>
+<p>See a description <a class="reference internal" href="#amdgpu-synid-glc"><em>here</em></a>.</p>
+</div>
+<div class="section" id="id9">
+<h4><a class="toc-backref" href="#id62">slc</a><a class="headerlink" href="#id9" title="Permalink to this headline">¶</a></h4>
+<p>See a description <a class="reference internal" href="#amdgpu-synid-slc"><em>here</em></a>.</p>
+</div>
+<div class="section" id="lds">
+<span id="amdgpu-synid-lds"></span><h4><a class="toc-backref" href="#id63">lds</a><a class="headerlink" href="#lds" title="Permalink to this headline">¶</a></h4>
+<p>Specifies where to store the result: VGPRs or LDS (VGPRs by default).</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>lds</td>
+<td>Store result in LDS.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="id10">
+<h4><a class="toc-backref" href="#id64">tfe</a><a class="headerlink" href="#id10" title="Permalink to this headline">¶</a></h4>
+<p>See a description <a class="reference internal" href="#amdgpu-synid-tfe"><em>here</em></a>.</p>
+</div>
+<div class="section" id="dfmt">
+<span id="amdgpu-synid-dfmt"></span><h4><a class="toc-backref" href="#id65">dfmt</a><a class="headerlink" href="#dfmt" title="Permalink to this headline">¶</a></h4>
+<p>TBD</p>
+</div>
+<div class="section" id="nfmt">
+<span id="amdgpu-synid-nfmt"></span><h4><a class="toc-backref" href="#id66">nfmt</a><a class="headerlink" href="#nfmt" title="Permalink to this headline">¶</a></h4>
+<p>TBD</p>
+</div>
+</div>
+<div class="section" id="smrd-smem-modifiers">
+<h3><a class="toc-backref" href="#id67">SMRD/SMEM Modifiers</a><a class="headerlink" href="#smrd-smem-modifiers" title="Permalink to this headline">¶</a></h3>
+<div class="section" id="id11">
+<h4><a class="toc-backref" href="#id68">glc</a><a class="headerlink" href="#id11" title="Permalink to this headline">¶</a></h4>
+<p>See a description <a class="reference internal" href="#amdgpu-synid-glc"><em>here</em></a>.</p>
+</div>
+<div class="section" id="id12">
+<h4><a class="toc-backref" href="#id69">nv</a><a class="headerlink" href="#id12" title="Permalink to this headline">¶</a></h4>
+<p>See a description <a class="reference internal" href="#amdgpu-synid-nv"><em>here</em></a>.</p>
+</div>
+</div>
+<div class="section" id="vintrp-modifiers">
+<h3><a class="toc-backref" href="#id70">VINTRP Modifiers</a><a class="headerlink" href="#vintrp-modifiers" title="Permalink to this headline">¶</a></h3>
+<div class="section" id="high">
+<span id="amdgpu-synid-high"></span><h4><a class="toc-backref" href="#id71">high</a><a class="headerlink" href="#high" title="Permalink to this headline">¶</a></h4>
+<p>Specifies which half of the LDS word to use. Low half of LDS word is used by default.
+GFX9 only.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>high</td>
+<td>Use high half of LDS word.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="vop1-vop2-dpp-modifiers">
+<h3><a class="toc-backref" href="#id72">VOP1/VOP2 DPP Modifiers</a><a class="headerlink" href="#vop1-vop2-dpp-modifiers" title="Permalink to this headline">¶</a></h3>
+<p>GFX8 and GFX9 only.</p>
+<div class="section" id="dpp-ctrl">
+<span id="amdgpu-synid-dpp-ctrl"></span><h4><a class="toc-backref" href="#id73">dpp_ctrl</a><a class="headerlink" href="#dpp-ctrl" title="Permalink to this headline">¶</a></h4>
+<p>Specifies how data are shared between threads. This is a mandatory modifier.
+There is no default value.</p>
+<p>Note. The lanes of a wavefront are organized in four banks and four rows.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>quad_perm:[{0..3},{0..3},{0..3},{0..3}]</td>
+<td>Full permute of 4 threads.</td>
+</tr>
+<tr class="row-odd"><td>row_mirror</td>
+<td>Mirror threads within row.</td>
+</tr>
+<tr class="row-even"><td>row_half_mirror</td>
+<td>Mirror threads within 1/2 row (8 threads).</td>
+</tr>
+<tr class="row-odd"><td>row_bcast:15</td>
+<td>Broadcast 15th thread of each row to next row.</td>
+</tr>
+<tr class="row-even"><td>row_bcast:31</td>
+<td>Broadcast thread 31 to rows 2 and 3.</td>
+</tr>
+<tr class="row-odd"><td>wave_shl:1</td>
+<td>Wavefront left shift by 1 thread.</td>
+</tr>
+<tr class="row-even"><td>wave_rol:1</td>
+<td>Wavefront left rotate by 1 thread.</td>
+</tr>
+<tr class="row-odd"><td>wave_shr:1</td>
+<td>Wavefront right shift by 1 thread.</td>
+</tr>
+<tr class="row-even"><td>wave_ror:1</td>
+<td>Wavefront right rotate by 1 thread.</td>
+</tr>
+<tr class="row-odd"><td>row_shl:{1..15}</td>
+<td>Row shift left by 1-15 threads.</td>
+</tr>
+<tr class="row-even"><td>row_shr:{1..15}</td>
+<td>Row shift right by 1-15 threads.</td>
+</tr>
+<tr class="row-odd"><td>row_ror:{1..15}</td>
+<td>Row rotate right by 1-15 threads.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="row-mask">
+<span id="amdgpu-synid-row-mask"></span><h4><a class="toc-backref" href="#id74">row_mask</a><a class="headerlink" href="#row-mask" title="Permalink to this headline">¶</a></h4>
+<p>Controls which rows are enabled for data sharing. By default, all rows are enabled.</p>
+<p>Note. The lanes of a wavefront are organized in four banks and four rows.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>row_mask:{0..15}</td>
+<td>Each of 4 bits in the mask controls one
+row (0 - disabled, 1 - enabled).</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="bank-mask">
+<span id="amdgpu-synid-bank-mask"></span><h4><a class="toc-backref" href="#id75">bank_mask</a><a class="headerlink" href="#bank-mask" title="Permalink to this headline">¶</a></h4>
+<p>Controls which banks are enabled for data sharing. By default, all banks are enabled.</p>
+<p>Note. The lanes of a wavefront are organized in four banks and four rows.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>bank_mask:{0..15}</td>
+<td>Each of 4 bits in the mask controls one
+bank (0 - disabled, 1 - enabled).</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="bound-ctrl">
+<span id="amdgpu-synid-bound-ctrl"></span><h4><a class="toc-backref" href="#id76">bound_ctrl</a><a class="headerlink" href="#bound-ctrl" title="Permalink to this headline">¶</a></h4>
+<p>Controls data sharing when accessing an invalid lane. By default, data sharing with
+invalid lanes is disabled.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>bound_ctrl:0</td>
+<td>Enables data sharing with invalid lanes.
+Accessing data from an invalid lane will
+return zero.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="vop1-vop2-vopc-sdwa-modifiers">
+<h3><a class="toc-backref" href="#id77">VOP1/VOP2/VOPC SDWA Modifiers</a><a class="headerlink" href="#vop1-vop2-vopc-sdwa-modifiers" title="Permalink to this headline">¶</a></h3>
+<p>GFX8 and GFX9 only.</p>
+<div class="section" id="clamp">
+<h4><a class="toc-backref" href="#id78">clamp</a><a class="headerlink" href="#clamp" title="Permalink to this headline">¶</a></h4>
+<p>See a description <a class="reference internal" href="#amdgpu-synid-clamp"><em>here</em></a>.</p>
+</div>
+<div class="section" id="omod">
+<h4><a class="toc-backref" href="#id79">omod</a><a class="headerlink" href="#omod" title="Permalink to this headline">¶</a></h4>
+<p>See a description <a class="reference internal" href="#amdgpu-synid-omod"><em>here</em></a>.</p>
+<p>GFX9 only.</p>
+</div>
+<div class="section" id="dst-sel">
+<span id="amdgpu-synid-dst-sel"></span><h4><a class="toc-backref" href="#id80">dst_sel</a><a class="headerlink" href="#dst-sel" title="Permalink to this headline">¶</a></h4>
+<p>Selects which bits in the destination are affected. By default, all bits are affected.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>dst_sel:DWORD</td>
+<td>Use bits 31:0.</td>
+</tr>
+<tr class="row-odd"><td>dst_sel:BYTE_0</td>
+<td>Use bits 7:0.</td>
+</tr>
+<tr class="row-even"><td>dst_sel:BYTE_1</td>
+<td>Use bits 15:8.</td>
+</tr>
+<tr class="row-odd"><td>dst_sel:BYTE_2</td>
+<td>Use bits 23:16.</td>
+</tr>
+<tr class="row-even"><td>dst_sel:BYTE_3</td>
+<td>Use bits 31:24.</td>
+</tr>
+<tr class="row-odd"><td>dst_sel:WORD_0</td>
+<td>Use bits 15:0.</td>
+</tr>
+<tr class="row-even"><td>dst_sel:WORD_1</td>
+<td>Use bits 31:16.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="dst-unused">
+<span id="amdgpu-synid-dst-unused"></span><h4><a class="toc-backref" href="#id81">dst_unused</a><a class="headerlink" href="#dst-unused" title="Permalink to this headline">¶</a></h4>
+<p>Controls what to do with the bits in the destination which are not selected
+by <a class="reference internal" href="#amdgpu-synid-dst-sel"><em>dst_sel</em></a>.
+By default, unused bits are preserved.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>dst_unused:UNUSED_PAD</td>
+<td>Pad with zeros.</td>
+</tr>
+<tr class="row-odd"><td>dst_unused:UNUSED_SEXT</td>
+<td>Sign-extend upper bits, zero lower bits.</td>
+</tr>
+<tr class="row-even"><td>dst_unused:UNUSED_PRESERVE</td>
+<td>Preserve bits.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="src0-sel">
+<span id="amdgpu-synid-src0-sel"></span><h4><a class="toc-backref" href="#id82">src0_sel</a><a class="headerlink" href="#src0-sel" title="Permalink to this headline">¶</a></h4>
+<p>Controls which bits in the src0 are used. By default, all bits are used.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>src0_sel:DWORD</td>
+<td>Use bits 31:0.</td>
+</tr>
+<tr class="row-odd"><td>src0_sel:BYTE_0</td>
+<td>Use bits 7:0.</td>
+</tr>
+<tr class="row-even"><td>src0_sel:BYTE_1</td>
+<td>Use bits 15:8.</td>
+</tr>
+<tr class="row-odd"><td>src0_sel:BYTE_2</td>
+<td>Use bits 23:16.</td>
+</tr>
+<tr class="row-even"><td>src0_sel:BYTE_3</td>
+<td>Use bits 31:24.</td>
+</tr>
+<tr class="row-odd"><td>src0_sel:WORD_0</td>
+<td>Use bits 15:0.</td>
+</tr>
+<tr class="row-even"><td>src0_sel:WORD_1</td>
+<td>Use bits 31:16.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="src1-sel">
+<span id="amdgpu-synid-src1-sel"></span><h4><a class="toc-backref" href="#id83">src1_sel</a><a class="headerlink" href="#src1-sel" title="Permalink to this headline">¶</a></h4>
+<p>Controls which bits in the src1 are used. By default, all bits are used.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>src1_sel:DWORD</td>
+<td>Use bits 31:0.</td>
+</tr>
+<tr class="row-odd"><td>src1_sel:BYTE_0</td>
+<td>Use bits 7:0.</td>
+</tr>
+<tr class="row-even"><td>src1_sel:BYTE_1</td>
+<td>Use bits 15:8.</td>
+</tr>
+<tr class="row-odd"><td>src1_sel:BYTE_2</td>
+<td>Use bits 23:16.</td>
+</tr>
+<tr class="row-even"><td>src1_sel:BYTE_3</td>
+<td>Use bits 31:24.</td>
+</tr>
+<tr class="row-odd"><td>src1_sel:WORD_0</td>
+<td>Use bits 15:0.</td>
+</tr>
+<tr class="row-even"><td>src1_sel:WORD_1</td>
+<td>Use bits 31:16.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="vop1-vop2-vopc-sdwa-operand-modifiers">
+<h3><a class="toc-backref" href="#id84">VOP1/VOP2/VOPC SDWA Operand Modifiers</a><a class="headerlink" href="#vop1-vop2-vopc-sdwa-operand-modifiers" title="Permalink to this headline">¶</a></h3>
+<p>Operand modifiers are not used separately. They are applied to source operands.</p>
+<p>GFX8 and GFX9 only.</p>
+<div class="section" id="abs">
+<h4><a class="toc-backref" href="#id85">abs</a><a class="headerlink" href="#abs" title="Permalink to this headline">¶</a></h4>
+<p>See a description <a class="reference internal" href="#amdgpu-synid-abs"><em>here</em></a>.</p>
+</div>
+<div class="section" id="neg">
+<h4><a class="toc-backref" href="#id86">neg</a><a class="headerlink" href="#neg" title="Permalink to this headline">¶</a></h4>
+<p>See a description <a class="reference internal" href="#amdgpu-synid-neg"><em>here</em></a>.</p>
+</div>
+<div class="section" id="sext">
+<span id="amdgpu-synid-sext"></span><h4><a class="toc-backref" href="#id87">sext</a><a class="headerlink" href="#sext" title="Permalink to this headline">¶</a></h4>
+<p>Sign-extends value of a (sub-dword) operand to fill all 32 bits.
+Has no effect for 32-bit operands.</p>
+<p>Valid for integer operands only.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>sext(<operand>)</td>
+<td>Sign-extend operand value.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="vop3-modifiers">
+<h3><a class="toc-backref" href="#id88">VOP3 Modifiers</a><a class="headerlink" href="#vop3-modifiers" title="Permalink to this headline">¶</a></h3>
+<div class="section" id="vop3-op-sel">
+<span id="amdgpu-synid-vop3-op-sel"></span><h4><a class="toc-backref" href="#id89">vop3_op_sel</a><a class="headerlink" href="#vop3-op-sel" title="Permalink to this headline">¶</a></h4>
+<p>Selects the low [15:0] or high [31:16] operand bits for source and destination operands.
+By default, low bits are used for all operands.</p>
+<p>The number of values specified with the op_sel modifier must match the number of instruction
+operands (both source and destination). First value controls src0, second value controls src1
+and so on, except that the last value controls destination.
+The value 0 selects the low bits, while 1 selects the high bits.</p>
+<p>Note. op_sel modifier affects 16-bit operands only. For 32-bit operands the value specified
+by op_sel must be 0.</p>
+<p>GFX9 only.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="40%" />
+<col width="60%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>op_sel:[{0..1},{0..1}]</td>
+<td>Select operand bits for instructions with 1 source operand.</td>
+</tr>
+<tr class="row-odd"><td>op_sel:[{0..1},{0..1},{0..1}]</td>
+<td>Select operand bits for instructions with 2 source operands.</td>
+</tr>
+<tr class="row-even"><td>op_sel:[{0..1},{0..1},{0..1},{0..1}]</td>
+<td>Select operand bits for instructions with 3 source operands.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="amdgpu-synid-clamp">
+<span id="id13"></span><h4><a class="toc-backref" href="#id90">clamp</a><a class="headerlink" href="#amdgpu-synid-clamp" title="Permalink to this headline">¶</a></h4>
+<p>Clamp meaning depends on instruction.</p>
+<p>For <em>v_cmp</em> instructions, clamp modifier indicates that the compare signals
+if a floating point exception occurs. By default, signaling is disabled.
+Not supported by GFX7.</p>
+<p>For integer operations, clamp modifier indicates that the result must be clamped
+to the largest and smallest representable value. By default, there is no clamping.
+Integer clamping is not supported by GFX7.</p>
+<p>For floating point operations, clamp modifier indicates that the result must be clamped
+to the range [0.0, 1.0]. By default, there is no clamping.</p>
+<p>Note. Clamp modifier is applied after <a class="reference internal" href="#amdgpu-synid-omod"><em>output modifiers</em></a> (if any).</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>clamp</td>
+<td>Enables clamping (or signaling).</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="amdgpu-synid-omod">
+<span id="id14"></span><h4><a class="toc-backref" href="#id91">omod</a><a class="headerlink" href="#amdgpu-synid-omod" title="Permalink to this headline">¶</a></h4>
+<p>Specifies if an output modifier must be applied to the result.
+By default, no output modifiers are applied.</p>
+<p>Note. Output modifiers are applied before <a class="reference internal" href="#amdgpu-synid-clamp"><em>clamping</em></a> (if any).</p>
+<p>Output modifiers are valid for f32 and f64 floating point results only.
+They must not be used with f16.</p>
+<p>Note. <em>v_cvt_f16_f32</em> is an exception. This instruction produces f16 result
+but accepts output modifiers.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>mul:2</td>
+<td>Multiply the result by 2.</td>
+</tr>
+<tr class="row-odd"><td>mul:4</td>
+<td>Multiply the result by 4.</td>
+</tr>
+<tr class="row-even"><td>div:2</td>
+<td>Multiply the result by 0.5.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="vop3-operand-modifiers">
+<h3><a class="toc-backref" href="#id92">VOP3 Operand Modifiers</a><a class="headerlink" href="#vop3-operand-modifiers" title="Permalink to this headline">¶</a></h3>
+<p>Operand modifiers are not used separately. They are applied to source operands.</p>
+<div class="section" id="amdgpu-synid-abs">
+<span id="id15"></span><h4><a class="toc-backref" href="#id93">abs</a><a class="headerlink" href="#amdgpu-synid-abs" title="Permalink to this headline">¶</a></h4>
+<p>Computes absolute value of its operand. Applied before <a class="reference internal" href="#amdgpu-synid-neg"><em>neg</em></a> (if any).
+Valid for floating point operands only.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>abs(<operand>)</td>
+<td>Get absolute value of operand.</td>
+</tr>
+<tr class="row-odd"><td>|<operand>|</td>
+<td>The same as above.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="amdgpu-synid-neg">
+<span id="id16"></span><h4><a class="toc-backref" href="#id94">neg</a><a class="headerlink" href="#amdgpu-synid-neg" title="Permalink to this headline">¶</a></h4>
+<p>Computes negative value of its operand. Applied after <a class="reference internal" href="#amdgpu-synid-abs"><em>abs</em></a> (if any).
+Valid for floating point operands only.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>neg(<operand>)</td>
+<td>Get negative value of operand.</td>
+</tr>
+<tr class="row-odd"><td>-<operand></td>
+<td>The same as above.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="vop3p-modifiers">
+<h3><a class="toc-backref" href="#id95">VOP3P Modifiers</a><a class="headerlink" href="#vop3p-modifiers" title="Permalink to this headline">¶</a></h3>
+<p>This section describes modifiers of regular VOP3P instructions.
+<em>v_mad_mix</em> modifiers are described <a class="reference internal" href="#amdgpu-synid-mad-mix"><em>in a separate section</em></a>.</p>
+<p>GFX9 only.</p>
+<div class="section" id="op-sel">
+<span id="amdgpu-synid-op-sel"></span><h4><a class="toc-backref" href="#id96">op_sel</a><a class="headerlink" href="#op-sel" title="Permalink to this headline">¶</a></h4>
+<p>Selects the low [15:0] or high [31:16] operand bits as input to the operation
+which results in the lower-half of the destination.
+By default, low bits are used for all operands.</p>
+<p>The number of values specified with the op_sel modifier must match the number of source
+operands. First value controls src0, second value controls src1 and so on.
+The value 0 selects the low bits, while 1 selects the high bits.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="40%" />
+<col width="60%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>op_sel:[{0..1}]</td>
+<td>Select operand bits for instructions with 1 source operand.</td>
+</tr>
+<tr class="row-odd"><td>op_sel:[{0..1},{0..1}]</td>
+<td>Select operand bits for instructions with 2 source operands.</td>
+</tr>
+<tr class="row-even"><td>op_sel:[{0..1},{0..1},{0..1}]</td>
+<td>Select operand bits for instructions with 3 source operands.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="op-sel-hi">
+<span id="amdgpu-synid-op-sel-hi"></span><h4><a class="toc-backref" href="#id97">op_sel_hi</a><a class="headerlink" href="#op-sel-hi" title="Permalink to this headline">¶</a></h4>
+<p>Selects the low [15:0] or high [31:16] operand bits as input to the operation
+which results in the upper-half of the destination.
+By default, high bits are used for all operands.</p>
+<p>The number of values specified with the op_sel_hi modifier must match the number of source
+operands. First value controls src0, second value controls src1 and so on.
+The value 0 selects the low bits, while 1 selects the high bits.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="40%" />
+<col width="60%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>op_sel_hi:[{0..1}]</td>
+<td>Select operand bits for instructions with 1 source operand.</td>
+</tr>
+<tr class="row-odd"><td>op_sel_hi:[{0..1},{0..1}]</td>
+<td>Select operand bits for instructions with 2 source operands.</td>
+</tr>
+<tr class="row-even"><td>op_sel_hi:[{0..1},{0..1},{0..1}]</td>
+<td>Select operand bits for instructions with 3 source operands.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="neg-lo">
+<span id="amdgpu-synid-neg-lo"></span><h4><a class="toc-backref" href="#id98">neg_lo</a><a class="headerlink" href="#neg-lo" title="Permalink to this headline">¶</a></h4>
+<p>Specifies whether to change sign of operand values selected by
+<a class="reference internal" href="#amdgpu-synid-op-sel"><em>op_sel</em></a>. These values are then used
+as input to the operation which results in the upper-half of the destination.</p>
+<p>The number of values specified with this modifier must match the number of source
+operands. First value controls src0, second value controls src1 and so on.</p>
+<p>The value 0 indicates that the corresponding operand value is used unmodified,
+the value 1 indicates that negative value of the operand must be used.</p>
+<p>By default, operand values are used unmodified.</p>
+<p>This modifier is valid for floating point operands only.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="38%" />
+<col width="62%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>neg_lo:[{0..1}]</td>
+<td>Select affected operands for instructions with 1 source operand.</td>
+</tr>
+<tr class="row-odd"><td>neg_lo:[{0..1},{0..1}]</td>
+<td>Select affected operands for instructions with 2 source operands.</td>
+</tr>
+<tr class="row-even"><td>neg_lo:[{0..1},{0..1},{0..1}]</td>
+<td>Select affected operands for instructions with 3 source operands.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="neg-hi">
+<span id="amdgpu-synid-neg-hi"></span><h4><a class="toc-backref" href="#id99">neg_hi</a><a class="headerlink" href="#neg-hi" title="Permalink to this headline">¶</a></h4>
+<p>Specifies whether to change sign of operand values selected by
+<a class="reference internal" href="#amdgpu-synid-op-sel-hi"><em>op_sel_hi</em></a>. These values are then used
+as input to the operation which results in the upper-half of the destination.</p>
+<p>The number of values specified with this modifier must match the number of source
+operands. First value controls src0, second value controls src1 and so on.</p>
+<p>The value 0 indicates that the corresponding operand value is used unmodified,
+the value 1 indicates that negative value of the operand must be used.</p>
+<p>By default, operand values are used unmodified.</p>
+<p>This modifier is valid for floating point operands only.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="38%" />
+<col width="62%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>neg_hi:[{0..1}]</td>
+<td>Select affected operands for instructions with 1 source operand.</td>
+</tr>
+<tr class="row-odd"><td>neg_hi:[{0..1},{0..1}]</td>
+<td>Select affected operands for instructions with 2 source operands.</td>
+</tr>
+<tr class="row-even"><td>neg_hi:[{0..1},{0..1},{0..1}]</td>
+<td>Select affected operands for instructions with 3 source operands.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="id17">
+<h4><a class="toc-backref" href="#id100">clamp</a><a class="headerlink" href="#id17" title="Permalink to this headline">¶</a></h4>
+<p>See a description <a class="reference internal" href="#amdgpu-synid-clamp"><em>here</em></a>.</p>
+</div>
+</div>
+<div class="section" id="vop3p-v-mad-mix-modifiers">
+<span id="amdgpu-synid-mad-mix"></span><h3><a class="toc-backref" href="#id101">VOP3P V_MAD_MIX Modifiers</a><a class="headerlink" href="#vop3p-v-mad-mix-modifiers" title="Permalink to this headline">¶</a></h3>
+<p>These instructions use VOP3P format but have different modifiers.</p>
+<p>GFX9 only.</p>
+<div class="section" id="mad-mix-op-sel">
+<span id="amdgpu-synid-mad-mix-op-sel"></span><h4><a class="toc-backref" href="#id102">mad_mix_op_sel</a><a class="headerlink" href="#mad-mix-op-sel" title="Permalink to this headline">¶</a></h4>
+<p>This operand has meaning only for 16-bit source operands as indicated by
+<a class="reference internal" href="#amdgpu-synid-mad-mix-op-sel-hi"><em>mad_mix_op_sel_hi</em></a>.
+It specifies to select either the low [15:0] or high [31:16] operand bits
+as input to the operation.</p>
+<p>The value 0 indicates the low bits, the value 1 indicates the high 16 bits.
+By default, low bits are used for all operands.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>op_sel:[{0..1},{0..1},{0..1}]</td>
+<td>Select location of each 16-bit source operand.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="mad-mix-op-sel-hi">
+<span id="amdgpu-synid-mad-mix-op-sel-hi"></span><h4><a class="toc-backref" href="#id103">mad_mix_op_sel_hi</a><a class="headerlink" href="#mad-mix-op-sel-hi" title="Permalink to this headline">¶</a></h4>
+<p>Selects the size of source operands: either 32 bits or 16 bits.
+By default, 32 bits are used for all source operands.</p>
+<p>The value 0 indicates 32 bits, the value 1 indicates 16 bits.
+The location of 16 bits in the operand may be specified by
+<a class="reference internal" href="#amdgpu-synid-mad-mix-op-sel"><em>mad_mix_op_sel</em></a>.</p>
+<blockquote>
+<div><table border="1" class="docutils">
+<colgroup>
+<col width="45%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Syntax</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>op_sel_hi:[{0..1},{0..1},{0..1}]</td>
+<td>Select size of each source operand.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="id18">
+<h4><a class="toc-backref" href="#id104">abs</a><a class="headerlink" href="#id18" title="Permalink to this headline">¶</a></h4>
+<p>See a description <a class="reference internal" href="#amdgpu-synid-abs"><em>here</em></a>.</p>
+</div>
+<div class="section" id="id19">
+<h4><a class="toc-backref" href="#id105">neg</a><a class="headerlink" href="#id19" title="Permalink to this headline">¶</a></h4>
+<p>See a description <a class="reference internal" href="#amdgpu-synid-neg"><em>here</em></a>.</p>
+</div>
+<div class="section" id="id20">
+<h4><a class="toc-backref" href="#id106">clamp</a><a class="headerlink" href="#id20" title="Permalink to this headline">¶</a></h4>
+<p>See a description <a class="reference internal" href="#amdgpu-synid-clamp"><em>here</em></a>.</p>
+</div>
+</div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="StackMaps.html" title="Stack maps and patch points in LLVM"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="AMDGPUAsmGFX9.html" title="Syntax of GFX9 Instructions"
+ >previous</a> |</li>
+ <li><a href="http://llvm.org/">LLVM Home</a> | </li>
+ <li><a href="index.html">Documentation</a>»</li>
+
+ <li><a href="AMDGPUUsage.html" >User Guide for AMDGPU Backend</a> »</li>
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2003-2018, LLVM Project.
+ Last updated on 2018-12-21.
+ Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.1.3.
+ </div>
+ </body>
+</html>
\ No newline at end of file
Added: www-releases/trunk/7.0.1/docs/AMDGPUUsage.html
URL: http://llvm.org/viewvc/llvm-project/www-releases/trunk/7.0.1/docs/AMDGPUUsage.html?rev=349965&view=auto
==============================================================================
--- www-releases/trunk/7.0.1/docs/AMDGPUUsage.html (added)
+++ www-releases/trunk/7.0.1/docs/AMDGPUUsage.html Fri Dec 21 13:53:02 2018
@@ -0,0 +1,7587 @@
+
+
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>User Guide for AMDGPU Backend — LLVM 7 documentation</title>
+
+ <link rel="stylesheet" href="_static/llvm-theme.css" type="text/css" />
+ <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '',
+ VERSION: '7',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="_static/jquery.js"></script>
+ <script type="text/javascript" src="_static/underscore.js"></script>
+ <script type="text/javascript" src="_static/doctools.js"></script>
+ <link rel="top" title="LLVM 7 documentation" href="index.html" />
+ <link rel="next" title="Syntax of GFX7 Instructions" href="AMDGPUAsmGFX7.html" />
+ <link rel="prev" title="User Guide for NVPTX Back-end" href="NVPTXUsage.html" />
+<style type="text/css">
+ table.right { float: right; margin-left: 20px; }
+ table.right td { border: 1px solid #ccc; }
+</style>
+
+ </head>
+ <body>
+<div class="logo">
+ <a href="index.html">
+ <img src="_static/logo.png"
+ alt="LLVM Logo" width="250" height="88"/></a>
+</div>
+
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="AMDGPUAsmGFX7.html" title="Syntax of GFX7 Instructions"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="NVPTXUsage.html" title="User Guide for NVPTX Back-end"
+ accesskey="P">previous</a> |</li>
+ <li><a href="http://llvm.org/">LLVM Home</a> | </li>
+ <li><a href="index.html">Documentation</a>»</li>
+
+ </ul>
+ </div>
+
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="body">
+
+ <div class="section" id="user-guide-for-amdgpu-backend">
+<h1>User Guide for AMDGPU Backend<a class="headerlink" href="#user-guide-for-amdgpu-backend" title="Permalink to this headline">¶</a></h1>
+<div class="contents local topic" id="contents">
+<ul class="simple">
+<li><a class="reference internal" href="#introduction" id="id42">Introduction</a></li>
+<li><a class="reference internal" href="#llvm" id="id43">LLVM</a><ul>
+<li><a class="reference internal" href="#target-triples" id="id44">Target Triples</a></li>
+<li><a class="reference internal" href="#processors" id="id45">Processors</a></li>
+<li><a class="reference internal" href="#target-features" id="id46">Target Features</a></li>
+<li><a class="reference internal" href="#address-spaces" id="id47">Address Spaces</a></li>
+<li><a class="reference internal" href="#memory-scopes" id="id48">Memory Scopes</a></li>
+<li><a class="reference internal" href="#amdgpu-intrinsics" id="id49">AMDGPU Intrinsics</a></li>
+<li><a class="reference internal" href="#amdgpu-attributes" id="id50">AMDGPU Attributes</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#code-object" id="id51">Code Object</a><ul>
+<li><a class="reference internal" href="#header" id="id52">Header</a></li>
+<li><a class="reference internal" href="#sections" id="id53">Sections</a></li>
+<li><a class="reference internal" href="#note-records" id="id54">Note Records</a></li>
+<li><a class="reference internal" href="#symbols" id="id55">Symbols</a></li>
+<li><a class="reference internal" href="#relocation-records" id="id56">Relocation Records</a></li>
+<li><a class="reference internal" href="#dwarf" id="id57">DWARF</a><ul>
+<li><a class="reference internal" href="#address-space-mapping" id="id58">Address Space Mapping</a></li>
+<li><a class="reference internal" href="#register-mapping" id="id59">Register Mapping</a></li>
+<li><a class="reference internal" href="#source-text" id="id60">Source Text</a></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><a class="reference internal" href="#code-conventions" id="id61">Code Conventions</a><ul>
+<li><a class="reference internal" href="#amdhsa" id="id62">AMDHSA</a><ul>
+<li><a class="reference internal" href="#code-object-target-identification" id="id63">Code Object Target Identification</a></li>
+<li><a class="reference internal" href="#code-object-metadata" id="id64">Code Object Metadata</a></li>
+<li><a class="reference internal" href="#kernel-dispatch" id="id65">Kernel Dispatch</a></li>
+<li><a class="reference internal" href="#memory-spaces" id="id66">Memory Spaces</a></li>
+<li><a class="reference internal" href="#image-and-samplers" id="id67">Image and Samplers</a></li>
+<li><a class="reference internal" href="#hsa-signals" id="id68">HSA Signals</a></li>
+<li><a class="reference internal" href="#hsa-aql-queue" id="id69">HSA AQL Queue</a></li>
+<li><a class="reference internal" href="#kernel-descriptor" id="id70">Kernel Descriptor</a><ul>
+<li><a class="reference internal" href="#kernel-descriptor-for-gfx6-gfx9" id="id71">Kernel Descriptor for GFX6-GFX9</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#initial-kernel-execution-state" id="id72">Initial Kernel Execution State</a></li>
+<li><a class="reference internal" href="#kernel-prolog" id="id73">Kernel Prolog</a><ul>
+<li><a class="reference internal" href="#m0" id="id74">M0</a></li>
+<li><a class="reference internal" href="#flat-scratch" id="id75">Flat Scratch</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#memory-model" id="id76">Memory Model</a></li>
+<li><a class="reference internal" href="#trap-handler-abi" id="id77">Trap Handler ABI</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#amdpal" id="id78">AMDPAL</a><ul>
+<li><a class="reference internal" href="#user-data" id="id79">User Data</a></li>
+<li><a class="reference internal" href="#compute-user-data" id="id80">Compute User Data</a></li>
+<li><a class="reference internal" href="#graphics-user-data" id="id81">Graphics User Data</a></li>
+<li><a class="reference internal" href="#global-internal-table" id="id82">Global Internal Table</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#unspecified-os" id="id83">Unspecified OS</a><ul>
+<li><a class="reference internal" href="#id35" id="id84">Trap Handler ABI</a></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><a class="reference internal" href="#source-languages" id="id85">Source Languages</a><ul>
+<li><a class="reference internal" href="#opencl" id="id86">OpenCL</a></li>
+<li><a class="reference internal" href="#hcc" id="id87">HCC</a></li>
+<li><a class="reference internal" href="#assembler" id="id88">Assembler</a><ul>
+<li><a class="reference internal" href="#instructions" id="id89">Instructions</a></li>
+<li><a class="reference internal" href="#operands" id="id90">Operands</a></li>
+<li><a class="reference internal" href="#modifiers" id="id91">Modifiers</a></li>
+<li><a class="reference internal" href="#instruction-examples" id="id92">Instruction Examples</a><ul>
+<li><a class="reference internal" href="#ds" id="id93">DS</a></li>
+<li><a class="reference internal" href="#flat" id="id94">FLAT</a></li>
+<li><a class="reference internal" href="#mubuf" id="id95">MUBUF</a></li>
+<li><a class="reference internal" href="#smrd-smem" id="id96">SMRD/SMEM</a></li>
+<li><a class="reference internal" href="#sop1" id="id97">SOP1</a></li>
+<li><a class="reference internal" href="#sop2" id="id98">SOP2</a></li>
+<li><a class="reference internal" href="#sopc" id="id99">SOPC</a></li>
+<li><a class="reference internal" href="#sopp" id="id100">SOPP</a></li>
+<li><a class="reference internal" href="#valu" id="id101">VALU</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#hsa-code-object-directives" id="id102">HSA Code Object Directives</a><ul>
+<li><a class="reference internal" href="#hsa-code-object-version-major-minor" id="id103">.hsa_code_object_version major, minor</a></li>
+<li><a class="reference internal" href="#hsa-code-object-isa-major-minor-stepping-vendor-arch" id="id104">.hsa_code_object_isa [major, minor, stepping, vendor, arch]</a></li>
+<li><a class="reference internal" href="#amdgpu-hsa-kernel-name" id="id105">.amdgpu_hsa_kernel (name)</a></li>
+<li><a class="reference internal" href="#amd-kernel-code-t" id="id106">.amd_kernel_code_t</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#predefined-symbols-mattr-code-object-v3" id="id107">Predefined Symbols (-mattr=+code-object-v3)</a><ul>
+<li><a class="reference internal" href="#amdgcn-gfx-generation-number" id="id108">.amdgcn.gfx_generation_number</a></li>
+<li><a class="reference internal" href="#amdgcn-next-free-vgpr" id="id109">.amdgcn.next_free_vgpr</a></li>
+<li><a class="reference internal" href="#amdgcn-next-free-sgpr" id="id110">.amdgcn.next_free_sgpr</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#code-object-directives-mattr-code-object-v3" id="id111">Code Object Directives (-mattr=+code-object-v3)</a><ul>
+<li><a class="reference internal" href="#amdgcn-target-target" id="id112">.amdgcn_target <target></a></li>
+<li><a class="reference internal" href="#amdhsa-kernel-name" id="id113">.amdhsa_kernel <name></a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#example-hsa-source-code-mattr-code-object-v3" id="id114">Example HSA Source Code (-mattr=+code-object-v3)</a></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><a class="reference internal" href="#additional-documentation" id="id115">Additional Documentation</a></li>
+</ul>
+</div>
+<div class="section" id="introduction">
+<h2><a class="toc-backref" href="#id42">Introduction</a><a class="headerlink" href="#introduction" title="Permalink to this headline">¶</a></h2>
+<p>The AMDGPU backend provides ISA code generation for AMD GPUs, starting with the
+R600 family up until the current GCN families. It lives in the
+<tt class="docutils literal"><span class="pre">lib/Target/AMDGPU</span></tt> directory.</p>
+</div>
+<div class="section" id="llvm">
+<h2><a class="toc-backref" href="#id43">LLVM</a><a class="headerlink" href="#llvm" title="Permalink to this headline">¶</a></h2>
+<div class="section" id="target-triples">
+<span id="amdgpu-target-triples"></span><h3><a class="toc-backref" href="#id44">Target Triples</a><a class="headerlink" href="#target-triples" title="Permalink to this headline">¶</a></h3>
+<p>Use the <tt class="docutils literal"><span class="pre">clang</span> <span class="pre">-target</span> <span class="pre"><Architecture>-<Vendor>-<OS>-<Environment></span></tt> option to
+specify the target triple:</p>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-architecture-table">
+<caption>AMDGPU Architectures</caption>
+<colgroup>
+<col width="16%" />
+<col width="84%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Architecture</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">r600</span></tt></td>
+<td>AMD GPUs HD2XXX-HD6XXX for graphics and compute shaders.</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">amdgcn</span></tt></td>
+<td>AMD GPUs GCN GFX6 onwards for graphics and compute shaders.</td>
+</tr>
+</tbody>
+</table>
+<table border="1" class="docutils" id="amdgpu-vendor-table">
+<caption>AMDGPU Vendors</caption>
+<colgroup>
+<col width="16%" />
+<col width="84%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Vendor</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">amd</span></tt></td>
+<td>Can be used for all AMD GPU usage.</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">mesa3d</span></tt></td>
+<td>Can be used if the OS is <tt class="docutils literal"><span class="pre">mesa3d</span></tt>.</td>
+</tr>
+</tbody>
+</table>
+<table border="1" class="docutils" id="amdgpu-os-table">
+<caption>AMDGPU Operating Systems</caption>
+<colgroup>
+<col width="19%" />
+<col width="81%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">OS</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td><em><empty></em></td>
+<td>Defaults to the <em>unknown</em> OS.</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">amdhsa</span></tt></td>
+<td>Compute kernels executed on HSA <a class="reference internal" href="#hsa">[HSA]</a> compatible runtimes
+such as AMD’s ROCm <a class="reference internal" href="#amd-rocm">[AMD-ROCm]</a>.</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">amdpal</span></tt></td>
+<td>Graphic shaders and compute kernels executed on AMD PAL
+runtime.</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">mesa3d</span></tt></td>
+<td>Graphic shaders and compute kernels executed on Mesa 3D
+runtime.</td>
+</tr>
+</tbody>
+</table>
+<table border="1" class="docutils" id="amdgpu-environment-table">
+<caption>AMDGPU Environments</caption>
+<colgroup>
+<col width="16%" />
+<col width="84%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Environment</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td><em><empty></em></td>
+<td>Default.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="processors">
+<span id="amdgpu-processors"></span><h3><a class="toc-backref" href="#id45">Processors</a><a class="headerlink" href="#processors" title="Permalink to this headline">¶</a></h3>
+<p>Use the <tt class="docutils literal"><span class="pre">clang</span> <span class="pre">-mcpu</span> <span class="pre"><Processor></span></tt> option to specify the AMD GPU processor. The
+names from both the <em>Processor</em> and <em>Alternative Processor</em> can be used.</p>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-processor-table">
+<caption>AMDGPU Processors</caption>
+<colgroup>
+<col width="14%" />
+<col width="19%" />
+<col width="15%" />
+<col width="6%" />
+<col width="11%" />
+<col width="9%" />
+<col width="27%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Processor</th>
+<th class="head">Alternative
+Processor</th>
+<th class="head">Target
+Triple
+Architecture</th>
+<th class="head">dGPU/
+APU</th>
+<th class="head">Target
+Features
+Supported
+[Default]</th>
+<th class="head">ROCm
+Support</th>
+<th class="head">Example
+Products</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td colspan="7"><strong>Radeon HD 2000/3000 Series (R600)</strong> <a class="reference internal" href="#amd-radeon-hd-2000-3000">[AMD-RADEON-HD-2000-3000]</a></td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">r600</span></tt></td>
+<td> </td>
+<td><tt class="docutils literal"><span class="pre">r600</span></tt></td>
+<td>dGPU</td>
+<td> </td>
+<td> </td>
+<td> </td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">r630</span></tt></td>
+<td> </td>
+<td><tt class="docutils literal"><span class="pre">r600</span></tt></td>
+<td>dGPU</td>
+<td> </td>
+<td> </td>
+<td> </td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">rs880</span></tt></td>
+<td> </td>
+<td><tt class="docutils literal"><span class="pre">r600</span></tt></td>
+<td>dGPU</td>
+<td> </td>
+<td> </td>
+<td> </td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">rv670</span></tt></td>
+<td> </td>
+<td><tt class="docutils literal"><span class="pre">r600</span></tt></td>
+<td>dGPU</td>
+<td> </td>
+<td> </td>
+<td> </td>
+</tr>
+<tr class="row-odd"><td colspan="7"><strong>Radeon HD 4000 Series (R700)</strong> <a class="reference internal" href="#amd-radeon-hd-4000">[AMD-RADEON-HD-4000]</a></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">rv710</span></tt></td>
+<td> </td>
+<td><tt class="docutils literal"><span class="pre">r600</span></tt></td>
+<td>dGPU</td>
+<td> </td>
+<td> </td>
+<td> </td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">rv730</span></tt></td>
+<td> </td>
+<td><tt class="docutils literal"><span class="pre">r600</span></tt></td>
+<td>dGPU</td>
+<td> </td>
+<td> </td>
+<td> </td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">rv770</span></tt></td>
+<td> </td>
+<td><tt class="docutils literal"><span class="pre">r600</span></tt></td>
+<td>dGPU</td>
+<td> </td>
+<td> </td>
+<td> </td>
+</tr>
+<tr class="row-odd"><td colspan="7"><strong>Radeon HD 5000 Series (Evergreen)</strong> <a class="reference internal" href="#amd-radeon-hd-5000">[AMD-RADEON-HD-5000]</a></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">cedar</span></tt></td>
+<td> </td>
+<td><tt class="docutils literal"><span class="pre">r600</span></tt></td>
+<td>dGPU</td>
+<td> </td>
+<td> </td>
+<td> </td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">cypress</span></tt></td>
+<td> </td>
+<td><tt class="docutils literal"><span class="pre">r600</span></tt></td>
+<td>dGPU</td>
+<td> </td>
+<td> </td>
+<td> </td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">juniper</span></tt></td>
+<td> </td>
+<td><tt class="docutils literal"><span class="pre">r600</span></tt></td>
+<td>dGPU</td>
+<td> </td>
+<td> </td>
+<td> </td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">redwood</span></tt></td>
+<td> </td>
+<td><tt class="docutils literal"><span class="pre">r600</span></tt></td>
+<td>dGPU</td>
+<td> </td>
+<td> </td>
+<td> </td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">sumo</span></tt></td>
+<td> </td>
+<td><tt class="docutils literal"><span class="pre">r600</span></tt></td>
+<td>dGPU</td>
+<td> </td>
+<td> </td>
+<td> </td>
+</tr>
+<tr class="row-odd"><td colspan="7"><strong>Radeon HD 6000 Series (Northern Islands)</strong> <a class="reference internal" href="#amd-radeon-hd-6000">[AMD-RADEON-HD-6000]</a></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">barts</span></tt></td>
+<td> </td>
+<td><tt class="docutils literal"><span class="pre">r600</span></tt></td>
+<td>dGPU</td>
+<td> </td>
+<td> </td>
+<td> </td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">caicos</span></tt></td>
+<td> </td>
+<td><tt class="docutils literal"><span class="pre">r600</span></tt></td>
+<td>dGPU</td>
+<td> </td>
+<td> </td>
+<td> </td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">cayman</span></tt></td>
+<td> </td>
+<td><tt class="docutils literal"><span class="pre">r600</span></tt></td>
+<td>dGPU</td>
+<td> </td>
+<td> </td>
+<td> </td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">turks</span></tt></td>
+<td> </td>
+<td><tt class="docutils literal"><span class="pre">r600</span></tt></td>
+<td>dGPU</td>
+<td> </td>
+<td> </td>
+<td> </td>
+</tr>
+<tr class="row-even"><td colspan="7"><strong>GCN GFX6 (Southern Islands (SI))</strong> <a class="reference internal" href="#amd-gcn-gfx6">[AMD-GCN-GFX6]</a></td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">gfx600</span></tt></td>
+<td><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">tahiti</span></tt></li>
+</ul>
+</td>
+<td><tt class="docutils literal"><span class="pre">amdgcn</span></tt></td>
+<td>dGPU</td>
+<td> </td>
+<td> </td>
+<td> </td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">gfx601</span></tt></td>
+<td><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">hainan</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">oland</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">pitcairn</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">verde</span></tt></li>
+</ul>
+</td>
+<td><tt class="docutils literal"><span class="pre">amdgcn</span></tt></td>
+<td>dGPU</td>
+<td> </td>
+<td> </td>
+<td> </td>
+</tr>
+<tr class="row-odd"><td colspan="7"><strong>GCN GFX7 (Sea Islands (CI))</strong> <a class="reference internal" href="#amd-gcn-gfx7">[AMD-GCN-GFX7]</a></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">gfx700</span></tt></td>
+<td><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">kaveri</span></tt></li>
+</ul>
+</td>
+<td><tt class="docutils literal"><span class="pre">amdgcn</span></tt></td>
+<td>APU</td>
+<td> </td>
+<td> </td>
+<td><ul class="first last simple">
+<li>A6-7000</li>
+<li>A6 Pro-7050B</li>
+<li>A8-7100</li>
+<li>A8 Pro-7150B</li>
+<li>A10-7300</li>
+<li>A10 Pro-7350B</li>
+<li>FX-7500</li>
+<li>A8-7200P</li>
+<li>A10-7400P</li>
+<li>FX-7600P</li>
+</ul>
+</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">gfx701</span></tt></td>
+<td><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">hawaii</span></tt></li>
+</ul>
+</td>
+<td><tt class="docutils literal"><span class="pre">amdgcn</span></tt></td>
+<td>dGPU</td>
+<td> </td>
+<td>ROCm</td>
+<td><ul class="first last simple">
+<li>FirePro W8100</li>
+<li>FirePro W9100</li>
+<li>FirePro S9150</li>
+<li>FirePro S9170</li>
+</ul>
+</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">gfx702</span></tt></td>
+<td> </td>
+<td><tt class="docutils literal"><span class="pre">amdgcn</span></tt></td>
+<td>dGPU</td>
+<td> </td>
+<td>ROCm</td>
+<td><ul class="first last simple">
+<li>Radeon R9 290</li>
+<li>Radeon R9 290x</li>
+<li>Radeon R390</li>
+<li>Radeon R390x</li>
+</ul>
+</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">gfx703</span></tt></td>
+<td><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">kabini</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">mullins</span></tt></li>
+</ul>
+</td>
+<td><tt class="docutils literal"><span class="pre">amdgcn</span></tt></td>
+<td>APU</td>
+<td> </td>
+<td> </td>
+<td><ul class="first last simple">
+<li>E1-2100</li>
+<li>E1-2200</li>
+<li>E1-2500</li>
+<li>E2-3000</li>
+<li>E2-3800</li>
+<li>A4-5000</li>
+<li>A4-5100</li>
+<li>A6-5200</li>
+<li>A4 Pro-3340B</li>
+</ul>
+</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">gfx704</span></tt></td>
+<td><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">bonaire</span></tt></li>
+</ul>
+</td>
+<td><tt class="docutils literal"><span class="pre">amdgcn</span></tt></td>
+<td>dGPU</td>
+<td> </td>
+<td> </td>
+<td><ul class="first last simple">
+<li>Radeon HD 7790</li>
+<li>Radeon HD 8770</li>
+<li>R7 260</li>
+<li>R7 260X</li>
+</ul>
+</td>
+</tr>
+<tr class="row-odd"><td colspan="7"><strong>GCN GFX8 (Volcanic Islands (VI))</strong> <a class="reference internal" href="#amd-gcn-gfx8">[AMD-GCN-GFX8]</a></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">gfx801</span></tt></td>
+<td><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">carrizo</span></tt></li>
+</ul>
+</td>
+<td><tt class="docutils literal"><span class="pre">amdgcn</span></tt></td>
+<td>APU</td>
+<td><ul class="first last simple">
+<li>xnack
+[on]</li>
+</ul>
+</td>
+<td> </td>
+<td><ul class="first last simple">
+<li>A6-8500P</li>
+<li>Pro A6-8500B</li>
+<li>A8-8600P</li>
+<li>Pro A8-8600B</li>
+<li>FX-8800P</li>
+<li>Pro A12-8800B</li>
+</ul>
+</td>
+</tr>
+<tr class="row-odd"><td></td>
+<td> </td>
+<td><tt class="docutils literal"><span class="pre">amdgcn</span></tt></td>
+<td>APU</td>
+<td><ul class="first last simple">
+<li>xnack
+[on]</li>
+</ul>
+</td>
+<td>ROCm</td>
+<td><ul class="first last simple">
+<li>A10-8700P</li>
+<li>Pro A10-8700B</li>
+<li>A10-8780P</li>
+</ul>
+</td>
+</tr>
+<tr class="row-even"><td></td>
+<td> </td>
+<td><tt class="docutils literal"><span class="pre">amdgcn</span></tt></td>
+<td>APU</td>
+<td><ul class="first last simple">
+<li>xnack
+[on]</li>
+</ul>
+</td>
+<td> </td>
+<td><ul class="first last simple">
+<li>A10-9600P</li>
+<li>A10-9630P</li>
+<li>A12-9700P</li>
+<li>A12-9730P</li>
+<li>FX-9800P</li>
+<li>FX-9830P</li>
+</ul>
+</td>
+</tr>
+<tr class="row-odd"><td></td>
+<td> </td>
+<td><tt class="docutils literal"><span class="pre">amdgcn</span></tt></td>
+<td>APU</td>
+<td><ul class="first last simple">
+<li>xnack
+[on]</li>
+</ul>
+</td>
+<td> </td>
+<td><ul class="first last simple">
+<li>E2-9010</li>
+<li>A6-9210</li>
+<li>A9-9410</li>
+</ul>
+</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">gfx802</span></tt></td>
+<td><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">iceland</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">tonga</span></tt></li>
+</ul>
+</td>
+<td><tt class="docutils literal"><span class="pre">amdgcn</span></tt></td>
+<td>dGPU</td>
+<td><ul class="first last simple">
+<li>xnack
+[off]</li>
+</ul>
+</td>
+<td>ROCm</td>
+<td><ul class="first last simple">
+<li>FirePro S7150</li>
+<li>FirePro S7100</li>
+<li>FirePro W7100</li>
+<li>Radeon R285</li>
+<li>Radeon R9 380</li>
+<li>Radeon R9 385</li>
+<li>Mobile FirePro
+M7170</li>
+</ul>
+</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">gfx803</span></tt></td>
+<td><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">fiji</span></tt></li>
+</ul>
+</td>
+<td><tt class="docutils literal"><span class="pre">amdgcn</span></tt></td>
+<td>dGPU</td>
+<td><ul class="first last simple">
+<li>xnack
+[off]</li>
+</ul>
+</td>
+<td>ROCm</td>
+<td><ul class="first last simple">
+<li>Radeon R9 Nano</li>
+<li>Radeon R9 Fury</li>
+<li>Radeon R9 FuryX</li>
+<li>Radeon Pro Duo</li>
+<li>FirePro S9300x2</li>
+<li>Radeon Instinct MI8</li>
+</ul>
+</td>
+</tr>
+<tr class="row-even"><td></td>
+<td><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">polaris10</span></tt></li>
+</ul>
+</td>
+<td><tt class="docutils literal"><span class="pre">amdgcn</span></tt></td>
+<td>dGPU</td>
+<td><ul class="first last simple">
+<li>xnack
+[off]</li>
+</ul>
+</td>
+<td>ROCm</td>
+<td><ul class="first last simple">
+<li>Radeon RX 470</li>
+<li>Radeon RX 480</li>
+<li>Radeon Instinct MI6</li>
+</ul>
+</td>
+</tr>
+<tr class="row-odd"><td></td>
+<td><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">polaris11</span></tt></li>
+</ul>
+</td>
+<td><tt class="docutils literal"><span class="pre">amdgcn</span></tt></td>
+<td>dGPU</td>
+<td><ul class="first last simple">
+<li>xnack
+[off]</li>
+</ul>
+</td>
+<td>ROCm</td>
+<td><ul class="first last simple">
+<li>Radeon RX 460</li>
+</ul>
+</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">gfx810</span></tt></td>
+<td><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">stoney</span></tt></li>
+</ul>
+</td>
+<td><tt class="docutils literal"><span class="pre">amdgcn</span></tt></td>
+<td>APU</td>
+<td><ul class="first last simple">
+<li>xnack
+[on]</li>
+</ul>
+</td>
+<td> </td>
+<td> </td>
+</tr>
+<tr class="row-odd"><td colspan="7"><strong>GCN GFX9</strong> <a class="reference internal" href="#amd-gcn-gfx9">[AMD-GCN-GFX9]</a></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">gfx900</span></tt></td>
+<td> </td>
+<td><tt class="docutils literal"><span class="pre">amdgcn</span></tt></td>
+<td>dGPU</td>
+<td><ul class="first last simple">
+<li>xnack
+[off]</li>
+</ul>
+</td>
+<td>ROCm</td>
+<td><ul class="first last simple">
+<li>Radeon Vega
+Frontier Edition</li>
+<li>Radeon RX Vega 56</li>
+<li>Radeon RX Vega 64</li>
+<li>Radeon RX Vega 64
+Liquid</li>
+<li>Radeon Instinct MI25</li>
+</ul>
+</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">gfx902</span></tt></td>
+<td> </td>
+<td><tt class="docutils literal"><span class="pre">amdgcn</span></tt></td>
+<td>APU</td>
+<td><ul class="first last simple">
+<li>xnack
+[on]</li>
+</ul>
+</td>
+<td> </td>
+<td><ul class="first last simple">
+<li>Ryzen 3 2200G</li>
+<li>Ryzen 5 2400G</li>
+</ul>
+</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">gfx904</span></tt></td>
+<td> </td>
+<td><tt class="docutils literal"><span class="pre">amdgcn</span></tt></td>
+<td>dGPU</td>
+<td><ul class="first last simple">
+<li>xnack
+[off]</li>
+</ul>
+</td>
+<td> </td>
+<td><em>TBA</em></td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">gfx906</span></tt></td>
+<td> </td>
+<td><tt class="docutils literal"><span class="pre">amdgcn</span></tt></td>
+<td>dGPU</td>
+<td><ul class="first last simple">
+<li>xnack
+[off]</li>
+</ul>
+</td>
+<td> </td>
+<td><em>TBA</em></td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="target-features">
+<span id="amdgpu-target-features"></span><h3><a class="toc-backref" href="#id46">Target Features</a><a class="headerlink" href="#target-features" title="Permalink to this headline">¶</a></h3>
+<p>Target features control how code is generated to support certain
+processor specific features. Not all target features are supported by
+all processors. The runtime must ensure that the features supported by
+the device used to execute the code match the features enabled when
+generating the code. A mismatch of features may result in incorrect
+execution, or a reduction in performance.</p>
+<p>The target features supported by each processor, and the default value
+used if not specified explicitly, is listed in
+<a class="reference internal" href="#amdgpu-processor-table"><em>AMDGPU Processors</em></a>.</p>
+<p>Use the <tt class="docutils literal"><span class="pre">clang</span> <span class="pre">-m[no-]<TargetFeature></span></tt> option to specify the AMD GPU
+target features.</p>
+<p>For example:</p>
+<dl class="docutils">
+<dt><tt class="docutils literal"><span class="pre">-mxnack</span></tt></dt>
+<dd>Enable the <tt class="docutils literal"><span class="pre">xnack</span></tt> feature.</dd>
+<dt><tt class="docutils literal"><span class="pre">-mno-xnack</span></tt></dt>
+<dd><p class="first">Disable the <tt class="docutils literal"><span class="pre">xnack</span></tt> feature.</p>
+<table border="1" class="last docutils" id="amdgpu-target-feature-table">
+<caption>AMDGPU Target Features</caption>
+<colgroup>
+<col width="22%" />
+<col width="78%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Target Feature</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>-m[no-]xnack</td>
+<td><p class="first">Enable/disable generating code that has
+memory clauses that are compatible with
+having XNACK replay enabled.</p>
+<p class="last">This is used for demand paging and page
+migration. If XNACK replay is enabled in
+the device, then if a page fault occurs
+the code may execute incorrectly if the
+<tt class="docutils literal"><span class="pre">xnack</span></tt> feature is not enabled. Executing
+code that has the feature enabled on a
+device that does not have XNACK replay
+enabled will execute correctly, but may
+be less performant than code with the
+feature disabled.</p>
+</td>
+</tr>
+</tbody>
+</table>
+</dd>
+</dl>
+</div>
+<div class="section" id="address-spaces">
+<span id="amdgpu-address-spaces"></span><h3><a class="toc-backref" href="#id47">Address Spaces</a><a class="headerlink" href="#address-spaces" title="Permalink to this headline">¶</a></h3>
+<p>The AMDGPU backend uses the following address space mappings.</p>
+<p>The memory space names used in the table, aside from the region memory space, is
+from the OpenCL standard.</p>
+<p>LLVM Address Space number is used throughout LLVM (for example, in LLVM IR).</p>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-address-space-mapping-table">
+<caption>Address Space Mapping</caption>
+<colgroup>
+<col width="51%" />
+<col width="49%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">LLVM Address Space</th>
+<th class="head">Memory Space</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>0</td>
+<td>Generic (Flat)</td>
+</tr>
+<tr class="row-odd"><td>1</td>
+<td>Global</td>
+</tr>
+<tr class="row-even"><td>2</td>
+<td>Region (GDS)</td>
+</tr>
+<tr class="row-odd"><td>3</td>
+<td>Local (group/LDS)</td>
+</tr>
+<tr class="row-even"><td>4</td>
+<td>Constant</td>
+</tr>
+<tr class="row-odd"><td>5</td>
+<td>Private (Scratch)</td>
+</tr>
+<tr class="row-even"><td>6</td>
+<td>Constant 32-bit</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="memory-scopes">
+<span id="amdgpu-memory-scopes"></span><h3><a class="toc-backref" href="#id48">Memory Scopes</a><a class="headerlink" href="#memory-scopes" title="Permalink to this headline">¶</a></h3>
+<p>This section provides LLVM memory synchronization scopes supported by the AMDGPU
+backend memory model when the target triple OS is <tt class="docutils literal"><span class="pre">amdhsa</span></tt> (see
+<a class="reference internal" href="#amdgpu-amdhsa-memory-model"><em>Memory Model</em></a> and <a class="reference internal" href="#amdgpu-target-triples"><em>Target Triples</em></a>).</p>
+<p>The memory model supported is based on the HSA memory model <a class="reference internal" href="#hsa">[HSA]</a> which is
+based in turn on HRF-indirect with scope inclusion <a class="reference internal" href="#hrf">[HRF]</a>. The happens-before
+relation is transitive over the synchonizes-with relation independent of scope,
+and synchonizes-with allows the memory scope instances to be inclusive (see
+table <a class="reference internal" href="#amdgpu-amdhsa-llvm-sync-scopes-table"><em>AMDHSA LLVM Sync Scopes</em></a>).</p>
+<p>This is different to the OpenCL <a class="reference internal" href="#id41">[OpenCL]</a> memory model which does not have scope
+inclusion and requires the memory scopes to exactly match. However, this
+is conservatively correct for OpenCL.</p>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-amdhsa-llvm-sync-scopes-table">
+<caption>AMDHSA LLVM Sync Scopes</caption>
+<colgroup>
+<col width="22%" />
+<col width="78%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">LLVM Sync Scope</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td><em>none</em></td>
+<td><p class="first">The default: <tt class="docutils literal"><span class="pre">system</span></tt>.</p>
+<p>Synchronizes with, and participates in modification and
+seq_cst total orderings with, other operations (except
+image operations) for all address spaces (except private,
+or generic that accesses private) provided the other
+operation’s sync scope is:</p>
+<ul class="last simple">
+<li><tt class="docutils literal"><span class="pre">system</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">agent</span></tt> and executed by a thread on the same agent.</li>
+<li><tt class="docutils literal"><span class="pre">workgroup</span></tt> and executed by a thread in the same
+workgroup.</li>
+<li><tt class="docutils literal"><span class="pre">wavefront</span></tt> and executed by a thread in the same
+wavefront.</li>
+</ul>
+</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">agent</span></tt></td>
+<td><p class="first">Synchronizes with, and participates in modification and
+seq_cst total orderings with, other operations (except
+image operations) for all address spaces (except private,
+or generic that accesses private) provided the other
+operation’s sync scope is:</p>
+<ul class="last simple">
+<li><tt class="docutils literal"><span class="pre">system</span></tt> or <tt class="docutils literal"><span class="pre">agent</span></tt> and executed by a thread on the
+same agent.</li>
+<li><tt class="docutils literal"><span class="pre">workgroup</span></tt> and executed by a thread in the same
+workgroup.</li>
+<li><tt class="docutils literal"><span class="pre">wavefront</span></tt> and executed by a thread in the same
+wavefront.</li>
+</ul>
+</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">workgroup</span></tt></td>
+<td><p class="first">Synchronizes with, and participates in modification and
+seq_cst total orderings with, other operations (except
+image operations) for all address spaces (except private,
+or generic that accesses private) provided the other
+operation’s sync scope is:</p>
+<ul class="last simple">
+<li><tt class="docutils literal"><span class="pre">system</span></tt>, <tt class="docutils literal"><span class="pre">agent</span></tt> or <tt class="docutils literal"><span class="pre">workgroup</span></tt> and executed by a
+thread in the same workgroup.</li>
+<li><tt class="docutils literal"><span class="pre">wavefront</span></tt> and executed by a thread in the same
+wavefront.</li>
+</ul>
+</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">wavefront</span></tt></td>
+<td><p class="first">Synchronizes with, and participates in modification and
+seq_cst total orderings with, other operations (except
+image operations) for all address spaces (except private,
+or generic that accesses private) provided the other
+operation’s sync scope is:</p>
+<ul class="last simple">
+<li><tt class="docutils literal"><span class="pre">system</span></tt>, <tt class="docutils literal"><span class="pre">agent</span></tt>, <tt class="docutils literal"><span class="pre">workgroup</span></tt> or <tt class="docutils literal"><span class="pre">wavefront</span></tt>
+and executed by a thread in the same wavefront.</li>
+</ul>
+</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">singlethread</span></tt></td>
+<td>Only synchronizes with, and participates in modification
+and seq_cst total orderings with, other operations (except
+image operations) running in the same thread for all
+address spaces (for example, in signal handlers).</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="amdgpu-intrinsics">
+<h3><a class="toc-backref" href="#id49">AMDGPU Intrinsics</a><a class="headerlink" href="#amdgpu-intrinsics" title="Permalink to this headline">¶</a></h3>
+<p>The AMDGPU backend implements the following LLVM IR intrinsics.</p>
+<p><em>This section is WIP.</em></p>
+</div>
+<div class="section" id="amdgpu-attributes">
+<h3><a class="toc-backref" href="#id50">AMDGPU Attributes</a><a class="headerlink" href="#amdgpu-attributes" title="Permalink to this headline">¶</a></h3>
+<p>The AMDGPU backend supports the following LLVM IR attributes.</p>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-llvm-ir-attributes-table">
+<caption>AMDGPU LLVM IR Attributes</caption>
+<colgroup>
+<col width="36%" />
+<col width="64%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">LLVM Attribute</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>“amdgpu-flat-work-group-size”=”min,max”</td>
+<td>Specify the minimum and maximum flat work group sizes that
+will be specified when the kernel is dispatched. Generated
+by the <tt class="docutils literal"><span class="pre">amdgpu_flat_work_group_size</span></tt> CLANG attribute <a class="reference internal" href="#clang-attr">[CLANG-ATTR]</a>.</td>
+</tr>
+<tr class="row-odd"><td>“amdgpu-implicitarg-num-bytes”=”n”</td>
+<td>Number of kernel argument bytes to add to the kernel
+argument block size for the implicit arguments. This
+varies by OS and language (for OpenCL see
+<a class="reference internal" href="#opencl-kernel-implicit-arguments-appended-for-amdhsa-os-table"><em>OpenCL kernel implicit arguments appended for AMDHSA OS</em></a>).</td>
+</tr>
+<tr class="row-even"><td>“amdgpu-max-work-group-size”=”n”</td>
+<td>Specify the maximum work-group size that will be specifed
+when the kernel is dispatched.</td>
+</tr>
+<tr class="row-odd"><td>“amdgpu-num-sgpr”=”n”</td>
+<td>Specifies the number of SGPRs to use. Generated by
+the <tt class="docutils literal"><span class="pre">amdgpu_num_sgpr</span></tt> CLANG attribute <a class="reference internal" href="#clang-attr">[CLANG-ATTR]</a>.</td>
+</tr>
+<tr class="row-even"><td>“amdgpu-num-vgpr”=”n”</td>
+<td>Specifies the number of VGPRs to use. Generated by the
+<tt class="docutils literal"><span class="pre">amdgpu_num_vgpr</span></tt> CLANG attribute <a class="reference internal" href="#clang-attr">[CLANG-ATTR]</a>.</td>
+</tr>
+<tr class="row-odd"><td>“amdgpu-waves-per-eu”=”m,n”</td>
+<td>Specify the minimum and maximum number of waves per
+execution unit. Generated by the <tt class="docutils literal"><span class="pre">amdgpu_waves_per_eu</span></tt>
+CLANG attribute <a class="reference internal" href="#clang-attr">[CLANG-ATTR]</a>.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="code-object">
+<h2><a class="toc-backref" href="#id51">Code Object</a><a class="headerlink" href="#code-object" title="Permalink to this headline">¶</a></h2>
+<p>The AMDGPU backend generates a standard ELF <a class="reference internal" href="#elf">[ELF]</a> relocatable code object that
+can be linked by <tt class="docutils literal"><span class="pre">lld</span></tt> to produce a standard ELF shared code object which can
+be loaded and executed on an AMDGPU target.</p>
+<div class="section" id="header">
+<h3><a class="toc-backref" href="#id52">Header</a><a class="headerlink" href="#header" title="Permalink to this headline">¶</a></h3>
+<p>The AMDGPU backend uses the following ELF header:</p>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-elf-header-table">
+<caption>AMDGPU ELF Header</caption>
+<colgroup>
+<col width="38%" />
+<col width="62%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Field</th>
+<th class="head">Value</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">e_ident[EI_CLASS]</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">ELFCLASS64</span></tt></td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">e_ident[EI_DATA]</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">ELFDATA2LSB</span></tt></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">e_ident[EI_OSABI]</span></tt></td>
+<td><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">ELFOSABI_NONE</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">ELFOSABI_AMDGPU_HSA</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">ELFOSABI_AMDGPU_PAL</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">ELFOSABI_AMDGPU_MESA3D</span></tt></li>
+</ul>
+</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">e_ident[EI_ABIVERSION]</span></tt></td>
+<td><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">ELFABIVERSION_AMDGPU_HSA</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">ELFABIVERSION_AMDGPU_PAL</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">ELFABIVERSION_AMDGPU_MESA3D</span></tt></li>
+</ul>
+</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">e_type</span></tt></td>
+<td><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">ET_REL</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">ET_DYN</span></tt></li>
+</ul>
+</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">e_machine</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">EM_AMDGPU</span></tt></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">e_entry</span></tt></td>
+<td>0</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">e_flags</span></tt></td>
+<td>See <a class="reference internal" href="#amdgpu-elf-header-e-flags-table"><em>AMDGPU ELF Header e_flags</em></a></td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-elf-header-enumeration-values-table">
+<caption>AMDGPU ELF Header Enumeration Values</caption>
+<colgroup>
+<col width="86%" />
+<col width="14%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Name</th>
+<th class="head">Value</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">EM_AMDGPU</span></tt></td>
+<td>224</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">ELFOSABI_NONE</span></tt></td>
+<td>0</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">ELFOSABI_AMDGPU_HSA</span></tt></td>
+<td>64</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">ELFOSABI_AMDGPU_PAL</span></tt></td>
+<td>65</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">ELFOSABI_AMDGPU_MESA3D</span></tt></td>
+<td>66</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">ELFABIVERSION_AMDGPU_HSA</span></tt></td>
+<td>1</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">ELFABIVERSION_AMDGPU_PAL</span></tt></td>
+<td>0</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">ELFABIVERSION_AMDGPU_MESA3D</span></tt></td>
+<td>0</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+<dl class="docutils">
+<dt><tt class="docutils literal"><span class="pre">e_ident[EI_CLASS]</span></tt></dt>
+<dd><p class="first">The ELF class is:</p>
+<ul class="last simple">
+<li><tt class="docutils literal"><span class="pre">ELFCLASS32</span></tt> for <tt class="docutils literal"><span class="pre">r600</span></tt> architecture.</li>
+<li><tt class="docutils literal"><span class="pre">ELFCLASS64</span></tt> for <tt class="docutils literal"><span class="pre">amdgcn</span></tt> architecture which only supports 64
+bit applications.</li>
+</ul>
+</dd>
+<dt><tt class="docutils literal"><span class="pre">e_ident[EI_DATA]</span></tt></dt>
+<dd>All AMDGPU targets use <tt class="docutils literal"><span class="pre">ELFDATA2LSB</span></tt> for little-endian byte ordering.</dd>
+<dt><tt class="docutils literal"><span class="pre">e_ident[EI_OSABI]</span></tt></dt>
+<dd><p class="first">One of the following AMD GPU architecture specific OS ABIs
+(see <a class="reference internal" href="#amdgpu-os-table"><em>AMDGPU Operating Systems</em></a>):</p>
+<ul class="last simple">
+<li><tt class="docutils literal"><span class="pre">ELFOSABI_NONE</span></tt> for <em>unknown</em> OS.</li>
+<li><tt class="docutils literal"><span class="pre">ELFOSABI_AMDGPU_HSA</span></tt> for <tt class="docutils literal"><span class="pre">amdhsa</span></tt> OS.</li>
+<li><tt class="docutils literal"><span class="pre">ELFOSABI_AMDGPU_PAL</span></tt> for <tt class="docutils literal"><span class="pre">amdpal</span></tt> OS.</li>
+<li><tt class="docutils literal"><span class="pre">ELFOSABI_AMDGPU_MESA3D</span></tt> for <tt class="docutils literal"><span class="pre">mesa3D</span></tt> OS.</li>
+</ul>
+</dd>
+<dt><tt class="docutils literal"><span class="pre">e_ident[EI_ABIVERSION]</span></tt></dt>
+<dd><p class="first">The ABI version of the AMD GPU architecture specific OS ABI to which the code
+object conforms:</p>
+<ul class="last simple">
+<li><tt class="docutils literal"><span class="pre">ELFABIVERSION_AMDGPU_HSA</span></tt> is used to specify the version of AMD HSA
+runtime ABI.</li>
+<li><tt class="docutils literal"><span class="pre">ELFABIVERSION_AMDGPU_PAL</span></tt> is used to specify the version of AMD PAL
+runtime ABI.</li>
+<li><tt class="docutils literal"><span class="pre">ELFABIVERSION_AMDGPU_MESA3D</span></tt> is used to specify the version of AMD MESA
+3D runtime ABI.</li>
+</ul>
+</dd>
+<dt><tt class="docutils literal"><span class="pre">e_type</span></tt></dt>
+<dd><p class="first">Can be one of the following values:</p>
+<dl class="docutils">
+<dt><tt class="docutils literal"><span class="pre">ET_REL</span></tt></dt>
+<dd>The type produced by the AMD GPU backend compiler as it is relocatable code
+object.</dd>
+<dt><tt class="docutils literal"><span class="pre">ET_DYN</span></tt></dt>
+<dd>The type produced by the linker as it is a shared code object.</dd>
+</dl>
+<p class="last">The AMD HSA runtime loader requires a <tt class="docutils literal"><span class="pre">ET_DYN</span></tt> code object.</p>
+</dd>
+<dt><tt class="docutils literal"><span class="pre">e_machine</span></tt></dt>
+<dd>The value <tt class="docutils literal"><span class="pre">EM_AMDGPU</span></tt> is used for the machine for all processors supported
+by the <tt class="docutils literal"><span class="pre">r600</span></tt> and <tt class="docutils literal"><span class="pre">amdgcn</span></tt> architectures (see
+<a class="reference internal" href="#amdgpu-processor-table"><em>AMDGPU Processors</em></a>). The specific processor is specified in the
+<tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH</span></tt> bit field of the <tt class="docutils literal"><span class="pre">e_flags</span></tt> (see
+<a class="reference internal" href="#amdgpu-elf-header-e-flags-table"><em>AMDGPU ELF Header e_flags</em></a>).</dd>
+<dt><tt class="docutils literal"><span class="pre">e_entry</span></tt></dt>
+<dd>The entry point is 0 as the entry points for individual kernels must be
+selected in order to invoke them through AQL packets.</dd>
+<dt><tt class="docutils literal"><span class="pre">e_flags</span></tt></dt>
+<dd><p class="first">The AMDGPU backend uses the following ELF header flags:</p>
+<table border="1" class="docutils" id="amdgpu-elf-header-e-flags-table">
+<caption>AMDGPU ELF Header <tt class="docutils literal"><span class="pre">e_flags</span></tt></caption>
+<colgroup>
+<col width="42%" />
+<col width="13%" />
+<col width="45%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Name</th>
+<th class="head">Value</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td colspan="2"><strong>AMDGPU Processor Flag</strong></td>
+<td>See <a class="reference internal" href="#amdgpu-processor-table"><em>AMDGPU Processors</em></a>.</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH</span></tt></td>
+<td>0x000000ff</td>
+<td>AMDGPU processor selection
+mask for
+<tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_xxx</span></tt> values
+defined in
+<a class="reference internal" href="#amdgpu-ef-amdgpu-mach-table"><em>AMDGPU EF_AMDGPU_MACH Values</em></a>.</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_XNACK</span></tt></td>
+<td>0x00000100</td>
+<td>Indicates if the <tt class="docutils literal"><span class="pre">xnack</span></tt>
+target feature is
+enabled for all code
+contained in the code object.
+If the processor
+does not support the
+<tt class="docutils literal"><span class="pre">xnack</span></tt> target
+feature then must
+be 0.
+See
+<a class="reference internal" href="#amdgpu-target-features"><em>Target Features</em></a>.</td>
+</tr>
+</tbody>
+</table>
+<table border="1" class="last docutils" id="amdgpu-ef-amdgpu-mach-table">
+<caption>AMDGPU <tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH</span></tt> Values</caption>
+<colgroup>
+<col width="45%" />
+<col width="14%" />
+<col width="41%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Name</th>
+<th class="head">Value</th>
+<th class="head">Description (see
+<a class="reference internal" href="#amdgpu-processor-table"><em>AMDGPU Processors</em></a>)</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_NONE</span></tt></td>
+<td>0x000</td>
+<td><em>not specified</em></td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_R600_R600</span></tt></td>
+<td>0x001</td>
+<td><tt class="docutils literal"><span class="pre">r600</span></tt></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_R600_R630</span></tt></td>
+<td>0x002</td>
+<td><tt class="docutils literal"><span class="pre">r630</span></tt></td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_R600_RS880</span></tt></td>
+<td>0x003</td>
+<td><tt class="docutils literal"><span class="pre">rs880</span></tt></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_R600_RV670</span></tt></td>
+<td>0x004</td>
+<td><tt class="docutils literal"><span class="pre">rv670</span></tt></td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_R600_RV710</span></tt></td>
+<td>0x005</td>
+<td><tt class="docutils literal"><span class="pre">rv710</span></tt></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_R600_RV730</span></tt></td>
+<td>0x006</td>
+<td><tt class="docutils literal"><span class="pre">rv730</span></tt></td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_R600_RV770</span></tt></td>
+<td>0x007</td>
+<td><tt class="docutils literal"><span class="pre">rv770</span></tt></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_R600_CEDAR</span></tt></td>
+<td>0x008</td>
+<td><tt class="docutils literal"><span class="pre">cedar</span></tt></td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_R600_CYPRESS</span></tt></td>
+<td>0x009</td>
+<td><tt class="docutils literal"><span class="pre">cypress</span></tt></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_R600_JUNIPER</span></tt></td>
+<td>0x00a</td>
+<td><tt class="docutils literal"><span class="pre">juniper</span></tt></td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_R600_REDWOOD</span></tt></td>
+<td>0x00b</td>
+<td><tt class="docutils literal"><span class="pre">redwood</span></tt></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_R600_SUMO</span></tt></td>
+<td>0x00c</td>
+<td><tt class="docutils literal"><span class="pre">sumo</span></tt></td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_R600_BARTS</span></tt></td>
+<td>0x00d</td>
+<td><tt class="docutils literal"><span class="pre">barts</span></tt></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_R600_CAICOS</span></tt></td>
+<td>0x00e</td>
+<td><tt class="docutils literal"><span class="pre">caicos</span></tt></td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_R600_CAYMAN</span></tt></td>
+<td>0x00f</td>
+<td><tt class="docutils literal"><span class="pre">cayman</span></tt></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_R600_TURKS</span></tt></td>
+<td>0x010</td>
+<td><tt class="docutils literal"><span class="pre">turks</span></tt></td>
+</tr>
+<tr class="row-odd"><td><em>reserved</em></td>
+<td>0x011 -
+0x01f</td>
+<td>Reserved for <tt class="docutils literal"><span class="pre">r600</span></tt>
+architecture processors.</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_AMDGCN_GFX600</span></tt></td>
+<td>0x020</td>
+<td><tt class="docutils literal"><span class="pre">gfx600</span></tt></td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_AMDGCN_GFX601</span></tt></td>
+<td>0x021</td>
+<td><tt class="docutils literal"><span class="pre">gfx601</span></tt></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_AMDGCN_GFX700</span></tt></td>
+<td>0x022</td>
+<td><tt class="docutils literal"><span class="pre">gfx700</span></tt></td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_AMDGCN_GFX701</span></tt></td>
+<td>0x023</td>
+<td><tt class="docutils literal"><span class="pre">gfx701</span></tt></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_AMDGCN_GFX702</span></tt></td>
+<td>0x024</td>
+<td><tt class="docutils literal"><span class="pre">gfx702</span></tt></td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_AMDGCN_GFX703</span></tt></td>
+<td>0x025</td>
+<td><tt class="docutils literal"><span class="pre">gfx703</span></tt></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_AMDGCN_GFX704</span></tt></td>
+<td>0x026</td>
+<td><tt class="docutils literal"><span class="pre">gfx704</span></tt></td>
+</tr>
+<tr class="row-odd"><td><em>reserved</em></td>
+<td>0x027</td>
+<td>Reserved.</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_AMDGCN_GFX801</span></tt></td>
+<td>0x028</td>
+<td><tt class="docutils literal"><span class="pre">gfx801</span></tt></td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_AMDGCN_GFX802</span></tt></td>
+<td>0x029</td>
+<td><tt class="docutils literal"><span class="pre">gfx802</span></tt></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_AMDGCN_GFX803</span></tt></td>
+<td>0x02a</td>
+<td><tt class="docutils literal"><span class="pre">gfx803</span></tt></td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_AMDGCN_GFX810</span></tt></td>
+<td>0x02b</td>
+<td><tt class="docutils literal"><span class="pre">gfx810</span></tt></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_AMDGCN_GFX900</span></tt></td>
+<td>0x02c</td>
+<td><tt class="docutils literal"><span class="pre">gfx900</span></tt></td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_AMDGCN_GFX902</span></tt></td>
+<td>0x02d</td>
+<td><tt class="docutils literal"><span class="pre">gfx902</span></tt></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_AMDGCN_GFX904</span></tt></td>
+<td>0x02e</td>
+<td><tt class="docutils literal"><span class="pre">gfx904</span></tt></td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">EF_AMDGPU_MACH_AMDGCN_GFX906</span></tt></td>
+<td>0x02f</td>
+<td><tt class="docutils literal"><span class="pre">gfx906</span></tt></td>
+</tr>
+<tr class="row-even"><td><em>reserved</em></td>
+<td>0x030</td>
+<td>Reserved.</td>
+</tr>
+</tbody>
+</table>
+</dd>
+</dl>
+</div>
+<div class="section" id="sections">
+<h3><a class="toc-backref" href="#id53">Sections</a><a class="headerlink" href="#sections" title="Permalink to this headline">¶</a></h3>
+<p>An AMDGPU target ELF code object has the standard ELF sections which include:</p>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-elf-sections-table">
+<caption>AMDGPU ELF Sections</caption>
+<colgroup>
+<col width="27%" />
+<col width="24%" />
+<col width="49%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Name</th>
+<th class="head">Type</th>
+<th class="head">Attributes</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">.bss</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">SHT_NOBITS</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">SHF_ALLOC</span></tt> + <tt class="docutils literal"><span class="pre">SHF_WRITE</span></tt></td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">.data</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">SHT_PROGBITS</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">SHF_ALLOC</span></tt> + <tt class="docutils literal"><span class="pre">SHF_WRITE</span></tt></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">.debug_</span></tt><em>*</em></td>
+<td><tt class="docutils literal"><span class="pre">SHT_PROGBITS</span></tt></td>
+<td><em>none</em></td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">.dynamic</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">SHT_DYNAMIC</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">SHF_ALLOC</span></tt></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">.dynstr</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">SHT_PROGBITS</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">SHF_ALLOC</span></tt></td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">.dynsym</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">SHT_PROGBITS</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">SHF_ALLOC</span></tt></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">.got</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">SHT_PROGBITS</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">SHF_ALLOC</span></tt> + <tt class="docutils literal"><span class="pre">SHF_WRITE</span></tt></td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">.hash</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">SHT_HASH</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">SHF_ALLOC</span></tt></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">.note</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">SHT_NOTE</span></tt></td>
+<td><em>none</em></td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">.rela</span></tt><em>name</em></td>
+<td><tt class="docutils literal"><span class="pre">SHT_RELA</span></tt></td>
+<td><em>none</em></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">.rela.dyn</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">SHT_RELA</span></tt></td>
+<td><em>none</em></td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">.rodata</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">SHT_PROGBITS</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">SHF_ALLOC</span></tt></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">.shstrtab</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">SHT_STRTAB</span></tt></td>
+<td><em>none</em></td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">.strtab</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">SHT_STRTAB</span></tt></td>
+<td><em>none</em></td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">.symtab</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">SHT_SYMTAB</span></tt></td>
+<td><em>none</em></td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">.text</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">SHT_PROGBITS</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">SHF_ALLOC</span></tt> + <tt class="docutils literal"><span class="pre">SHF_EXECINSTR</span></tt></td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+<p>These sections have their standard meanings (see <a class="reference internal" href="#elf">[ELF]</a>) and are only generated
+if needed.</p>
+<dl class="docutils">
+<dt><tt class="docutils literal"><span class="pre">.debug</span></tt><em>*</em></dt>
+<dd>The standard DWARF sections. See <a class="reference internal" href="#amdgpu-dwarf"><em>DWARF</em></a> for information on the
+DWARF produced by the AMDGPU backend.</dd>
+<dt><tt class="docutils literal"><span class="pre">.dynamic</span></tt>, <tt class="docutils literal"><span class="pre">.dynstr</span></tt>, <tt class="docutils literal"><span class="pre">.dynsym</span></tt>, <tt class="docutils literal"><span class="pre">.hash</span></tt></dt>
+<dd>The standard sections used by a dynamic loader.</dd>
+<dt><tt class="docutils literal"><span class="pre">.note</span></tt></dt>
+<dd>See <a class="reference internal" href="#amdgpu-note-records"><em>Note Records</em></a> for the note records supported by the AMDGPU
+backend.</dd>
+<dt><tt class="docutils literal"><span class="pre">.rela</span></tt><em>name</em>, <tt class="docutils literal"><span class="pre">.rela.dyn</span></tt></dt>
+<dd><p class="first">For relocatable code objects, <em>name</em> is the name of the section that the
+relocation records apply. For example, <tt class="docutils literal"><span class="pre">.rela.text</span></tt> is the section name for
+relocation records associated with the <tt class="docutils literal"><span class="pre">.text</span></tt> section.</p>
+<p>For linked shared code objects, <tt class="docutils literal"><span class="pre">.rela.dyn</span></tt> contains all the relocation
+records from each of the relocatable code object’s <tt class="docutils literal"><span class="pre">.rela</span></tt><em>name</em> sections.</p>
+<p class="last">See <a class="reference internal" href="#amdgpu-relocation-records"><em>Relocation Records</em></a> for the relocation records supported by
+the AMDGPU backend.</p>
+</dd>
+<dt><tt class="docutils literal"><span class="pre">.text</span></tt></dt>
+<dd>The executable machine code for the kernels and functions they call. Generated
+as position independent code. See <a class="reference internal" href="#amdgpu-code-conventions"><em>Code Conventions</em></a> for
+information on conventions used in the isa generation.</dd>
+</dl>
+</div>
+<div class="section" id="note-records">
+<span id="amdgpu-note-records"></span><h3><a class="toc-backref" href="#id54">Note Records</a><a class="headerlink" href="#note-records" title="Permalink to this headline">¶</a></h3>
+<p>As required by <tt class="docutils literal"><span class="pre">ELFCLASS32</span></tt> and <tt class="docutils literal"><span class="pre">ELFCLASS64</span></tt>, minimal zero byte padding must
+be generated after the <tt class="docutils literal"><span class="pre">name</span></tt> field to ensure the <tt class="docutils literal"><span class="pre">desc</span></tt> field is 4 byte
+aligned. In addition, minimal zero byte padding must be generated to ensure the
+<tt class="docutils literal"><span class="pre">desc</span></tt> field size is a multiple of 4 bytes. The <tt class="docutils literal"><span class="pre">sh_addralign</span></tt> field of the
+<tt class="docutils literal"><span class="pre">.note</span></tt> section must be at least 4 to indicate at least 8 byte alignment.</p>
+<p>The AMDGPU backend code object uses the following ELF note records in the
+<tt class="docutils literal"><span class="pre">.note</span></tt> section. The <em>Description</em> column specifies the layout of the note
+record’s <tt class="docutils literal"><span class="pre">desc</span></tt> field. All fields are consecutive bytes. Note records with
+variable size strings have a corresponding <tt class="docutils literal"><span class="pre">*_size</span></tt> field that specifies the
+number of bytes, including the terminating null character, in the string. The
+string(s) come immediately after the preceding fields.</p>
+<p>Additional note records can be present.</p>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-elf-note-records-table">
+<caption>AMDGPU ELF Note Records</caption>
+<colgroup>
+<col width="7%" />
+<col width="41%" />
+<col width="52%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Name</th>
+<th class="head">Type</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>“AMD”</td>
+<td><tt class="docutils literal"><span class="pre">NT_AMD_AMDGPU_HSA_METADATA</span></tt></td>
+<td><metadata null terminated string></td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-elf-note-record-enumeration-values-table">
+<caption>AMDGPU ELF Note Record Enumeration Values</caption>
+<colgroup>
+<col width="86%" />
+<col width="14%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Name</th>
+<th class="head">Value</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td><em>reserved</em></td>
+<td>0-9</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">NT_AMD_AMDGPU_HSA_METADATA</span></tt></td>
+<td>10</td>
+</tr>
+<tr class="row-even"><td><em>reserved</em></td>
+<td>11</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+<dl class="docutils">
+<dt><tt class="docutils literal"><span class="pre">NT_AMD_AMDGPU_HSA_METADATA</span></tt></dt>
+<dd>Specifies extensible metadata associated with the code objects executed on HSA
+<a class="reference internal" href="#hsa">[HSA]</a> compatible runtimes such as AMD’s ROCm <a class="reference internal" href="#amd-rocm">[AMD-ROCm]</a>. It is required when
+the target triple OS is <tt class="docutils literal"><span class="pre">amdhsa</span></tt> (see <a class="reference internal" href="#amdgpu-target-triples"><em>Target Triples</em></a>). See
+<a class="reference internal" href="#amdgpu-amdhsa-code-object-metadata"><em>Code Object Metadata</em></a> for the syntax of the code
+object metadata string.</dd>
+</dl>
+</div>
+<div class="section" id="symbols">
+<span id="amdgpu-symbols"></span><h3><a class="toc-backref" href="#id55">Symbols</a><a class="headerlink" href="#symbols" title="Permalink to this headline">¶</a></h3>
+<p>Symbols include the following:</p>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-elf-symbols-table">
+<caption>AMDGPU ELF Symbols</caption>
+<colgroup>
+<col width="32%" />
+<col width="21%" />
+<col width="20%" />
+<col width="27%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Name</th>
+<th class="head">Type</th>
+<th class="head">Section</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td><em>link-name</em></td>
+<td><tt class="docutils literal"><span class="pre">STT_OBJECT</span></tt></td>
+<td><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">.data</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">.rodata</span></tt></li>
+<li><tt class="docutils literal"><span class="pre">.bss</span></tt></li>
+</ul>
+</td>
+<td>Global variable</td>
+</tr>
+<tr class="row-odd"><td><em>link-name</em><tt class="docutils literal"><span class="pre">.kd</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">STT_OBJECT</span></tt></td>
+<td><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">.rodata</span></tt></li>
+</ul>
+</td>
+<td>Kernel descriptor</td>
+</tr>
+<tr class="row-even"><td><em>link-name</em></td>
+<td><tt class="docutils literal"><span class="pre">STT_FUNC</span></tt></td>
+<td><ul class="first last simple">
+<li><tt class="docutils literal"><span class="pre">.text</span></tt></li>
+</ul>
+</td>
+<td>Kernel entry point</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+<dl class="docutils">
+<dt>Global variable</dt>
+<dd><p class="first">Global variables both used and defined by the compilation unit.</p>
+<p>If the symbol is defined in the compilation unit then it is allocated in the
+appropriate section according to if it has initialized data or is readonly.</p>
+<p>If the symbol is external then its section is <tt class="docutils literal"><span class="pre">STN_UNDEF</span></tt> and the loader
+will resolve relocations using the definition provided by another code object
+or explicitly defined by the runtime.</p>
+<p class="last">All global symbols, whether defined in the compilation unit or external, are
+accessed by the machine code indirectly through a GOT table entry. This
+allows them to be preemptable. The GOT table is only supported when the target
+triple OS is <tt class="docutils literal"><span class="pre">amdhsa</span></tt> (see <a class="reference internal" href="#amdgpu-target-triples"><em>Target Triples</em></a>).</p>
+</dd>
+<dt>Kernel descriptor</dt>
+<dd>Every HSA kernel has an associated kernel descriptor. It is the address of the
+kernel descriptor that is used in the AQL dispatch packet used to invoke the
+kernel, not the kernel entry point. The layout of the HSA kernel descriptor is
+defined in <a class="reference internal" href="#amdgpu-amdhsa-kernel-descriptor"><em>Kernel Descriptor</em></a>.</dd>
+<dt>Kernel entry point</dt>
+<dd>Every HSA kernel also has a symbol for its machine code entry point.</dd>
+</dl>
+</div>
+<div class="section" id="relocation-records">
+<span id="amdgpu-relocation-records"></span><h3><a class="toc-backref" href="#id56">Relocation Records</a><a class="headerlink" href="#relocation-records" title="Permalink to this headline">¶</a></h3>
+<p>AMDGPU backend generates <tt class="docutils literal"><span class="pre">Elf64_Rela</span></tt> relocation records. Supported
+relocatable fields are:</p>
+<dl class="docutils">
+<dt><tt class="docutils literal"><span class="pre">word32</span></tt></dt>
+<dd>This specifies a 32-bit field occupying 4 bytes with arbitrary byte
+alignment. These values use the same byte order as other word values in the
+AMD GPU architecture.</dd>
+<dt><tt class="docutils literal"><span class="pre">word64</span></tt></dt>
+<dd>This specifies a 64-bit field occupying 8 bytes with arbitrary byte
+alignment. These values use the same byte order as other word values in the
+AMD GPU architecture.</dd>
+</dl>
+<p>Following notations are used for specifying relocation calculations:</p>
+<dl class="docutils">
+<dt><strong>A</strong></dt>
+<dd>Represents the addend used to compute the value of the relocatable field.</dd>
+<dt><strong>G</strong></dt>
+<dd>Represents the offset into the global offset table at which the relocation
+entry’s symbol will reside during execution.</dd>
+<dt><strong>GOT</strong></dt>
+<dd>Represents the address of the global offset table.</dd>
+<dt><strong>P</strong></dt>
+<dd>Represents the place (section offset for <tt class="docutils literal"><span class="pre">et_rel</span></tt> or address for <tt class="docutils literal"><span class="pre">et_dyn</span></tt>)
+of the storage unit being relocated (computed using <tt class="docutils literal"><span class="pre">r_offset</span></tt>).</dd>
+<dt><strong>S</strong></dt>
+<dd>Represents the value of the symbol whose index resides in the relocation
+entry. Relocations not using this must specify a symbol index of <tt class="docutils literal"><span class="pre">STN_UNDEF</span></tt>.</dd>
+<dt><strong>B</strong></dt>
+<dd>Represents the base address of a loaded executable or shared object which is
+the difference between the ELF address and the actual load address. Relocations
+using this are only valid in executable or shared objects.</dd>
+</dl>
+<p>The following relocation types are supported:</p>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-elf-relocation-records-table">
+<caption>AMDGPU ELF Relocation Records</caption>
+<colgroup>
+<col width="33%" />
+<col width="9%" />
+<col width="6%" />
+<col width="13%" />
+<col width="38%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Relocation Type</th>
+<th class="head">Kind</th>
+<th class="head">Value</th>
+<th class="head">Field</th>
+<th class="head">Calculation</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">R_AMDGPU_NONE</span></tt></td>
+<td> </td>
+<td>0</td>
+<td><em>none</em></td>
+<td><em>none</em></td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">R_AMDGPU_ABS32_LO</span></tt></td>
+<td>Static,
+Dynamic</td>
+<td>1</td>
+<td><tt class="docutils literal"><span class="pre">word32</span></tt></td>
+<td>(S + A) & 0xFFFFFFFF</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">R_AMDGPU_ABS32_HI</span></tt></td>
+<td>Static,
+Dynamic</td>
+<td>2</td>
+<td><tt class="docutils literal"><span class="pre">word32</span></tt></td>
+<td>(S + A) >> 32</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">R_AMDGPU_ABS64</span></tt></td>
+<td>Static,
+Dynamic</td>
+<td>3</td>
+<td><tt class="docutils literal"><span class="pre">word64</span></tt></td>
+<td>S + A</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">R_AMDGPU_REL32</span></tt></td>
+<td>Static</td>
+<td>4</td>
+<td><tt class="docutils literal"><span class="pre">word32</span></tt></td>
+<td>S + A - P</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">R_AMDGPU_REL64</span></tt></td>
+<td>Static</td>
+<td>5</td>
+<td><tt class="docutils literal"><span class="pre">word64</span></tt></td>
+<td>S + A - P</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">R_AMDGPU_ABS32</span></tt></td>
+<td>Static,
+Dynamic</td>
+<td>6</td>
+<td><tt class="docutils literal"><span class="pre">word32</span></tt></td>
+<td>S + A</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">R_AMDGPU_GOTPCREL</span></tt></td>
+<td>Static</td>
+<td>7</td>
+<td><tt class="docutils literal"><span class="pre">word32</span></tt></td>
+<td>G + GOT + A - P</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">R_AMDGPU_GOTPCREL32_LO</span></tt></td>
+<td>Static</td>
+<td>8</td>
+<td><tt class="docutils literal"><span class="pre">word32</span></tt></td>
+<td>(G + GOT + A - P) & 0xFFFFFFFF</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">R_AMDGPU_GOTPCREL32_HI</span></tt></td>
+<td>Static</td>
+<td>9</td>
+<td><tt class="docutils literal"><span class="pre">word32</span></tt></td>
+<td>(G + GOT + A - P) >> 32</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">R_AMDGPU_REL32_LO</span></tt></td>
+<td>Static</td>
+<td>10</td>
+<td><tt class="docutils literal"><span class="pre">word32</span></tt></td>
+<td>(S + A - P) & 0xFFFFFFFF</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">R_AMDGPU_REL32_HI</span></tt></td>
+<td>Static</td>
+<td>11</td>
+<td><tt class="docutils literal"><span class="pre">word32</span></tt></td>
+<td>(S + A - P) >> 32</td>
+</tr>
+<tr class="row-even"><td><em>reserved</em></td>
+<td> </td>
+<td>12</td>
+<td> </td>
+<td> </td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">R_AMDGPU_RELATIVE64</span></tt></td>
+<td>Dynamic</td>
+<td>13</td>
+<td><tt class="docutils literal"><span class="pre">word64</span></tt></td>
+<td>B + A</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+<p><tt class="docutils literal"><span class="pre">R_AMDGPU_ABS32_LO</span></tt> and <tt class="docutils literal"><span class="pre">R_AMDGPU_ABS32_HI</span></tt> are only supported by
+the <tt class="docutils literal"><span class="pre">mesa3d</span></tt> OS, which does not support <tt class="docutils literal"><span class="pre">R_AMDGPU_ABS64</span></tt>.</p>
+<p>There is no current OS loader support for 32 bit programs and so
+<tt class="docutils literal"><span class="pre">R_AMDGPU_ABS32</span></tt> is not used.</p>
+</div>
+<div class="section" id="dwarf">
+<span id="amdgpu-dwarf"></span><h3><a class="toc-backref" href="#id57">DWARF</a><a class="headerlink" href="#dwarf" title="Permalink to this headline">¶</a></h3>
+<p>Standard DWARF <a class="reference internal" href="#id40">[DWARF]</a> Version 5 sections can be generated. These contain
+information that maps the code object executable code and data to the source
+language constructs. It can be used by tools such as debuggers and profilers.</p>
+<div class="section" id="address-space-mapping">
+<h4><a class="toc-backref" href="#id58">Address Space Mapping</a><a class="headerlink" href="#address-space-mapping" title="Permalink to this headline">¶</a></h4>
+<p>The following address space mapping is used:</p>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-dwarf-address-space-mapping-table">
+<caption>AMDGPU DWARF Address Space Mapping</caption>
+<colgroup>
+<col width="53%" />
+<col width="47%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">DWARF Address Space</th>
+<th class="head">Memory Space</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>1</td>
+<td>Private (Scratch)</td>
+</tr>
+<tr class="row-odd"><td>2</td>
+<td>Local (group/LDS)</td>
+</tr>
+<tr class="row-even"><td><em>omitted</em></td>
+<td>Global</td>
+</tr>
+<tr class="row-odd"><td><em>omitted</em></td>
+<td>Constant</td>
+</tr>
+<tr class="row-even"><td><em>omitted</em></td>
+<td>Generic (Flat)</td>
+</tr>
+<tr class="row-odd"><td><em>not supported</em></td>
+<td>Region (GDS)</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+<p>See <a class="reference internal" href="#amdgpu-address-spaces"><em>Address Spaces</em></a> for information on the memory space terminology
+used in the table.</p>
+<p>An <tt class="docutils literal"><span class="pre">address_class</span></tt> attribute is generated on pointer type DIEs to specify the
+DWARF address space of the value of the pointer when it is in the <em>private</em> or
+<em>local</em> address space. Otherwise the attribute is omitted.</p>
+<p>An <tt class="docutils literal"><span class="pre">XDEREF</span></tt> operation is generated in location list expressions for variables
+that are allocated in the <em>private</em> and <em>local</em> address space. Otherwise no
+<tt class="docutils literal"><span class="pre">XDREF</span></tt> is omitted.</p>
+</div>
+<div class="section" id="register-mapping">
+<h4><a class="toc-backref" href="#id59">Register Mapping</a><a class="headerlink" href="#register-mapping" title="Permalink to this headline">¶</a></h4>
+<p><em>This section is WIP.</em></p>
+</div>
+<div class="section" id="source-text">
+<h4><a class="toc-backref" href="#id60">Source Text</a><a class="headerlink" href="#source-text" title="Permalink to this headline">¶</a></h4>
+<p>Source text for online-compiled programs (e.g. those compiled by the OpenCL
+runtime) may be embedded into the DWARF v5 line table using the <tt class="docutils literal"><span class="pre">clang</span>
+<span class="pre">-gembed-source</span></tt> option, described in table <a class="reference internal" href="#amdgpu-debug-options"><em>AMDGPU Debug Options</em></a>.</p>
+<p>For example:</p>
+<dl class="docutils">
+<dt><tt class="docutils literal"><span class="pre">-gembed-source</span></tt></dt>
+<dd>Enable the embedded source DWARF v5 extension.</dd>
+<dt><tt class="docutils literal"><span class="pre">-gno-embed-source</span></tt></dt>
+<dd><p class="first">Disable the embedded source DWARF v5 extension.</p>
+<table border="1" class="last docutils" id="amdgpu-debug-options">
+<caption>AMDGPU Debug Options</caption>
+<colgroup>
+<col width="29%" />
+<col width="71%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Debug Flag</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>-g[no-]embed-source</td>
+<td>Enable/disable embedding source text in DWARF
+debug sections. Useful for environments where
+source cannot be written to disk, such as
+when performing online compilation.</td>
+</tr>
+</tbody>
+</table>
+</dd>
+</dl>
+<p>This option enables one extended content types in the DWARF v5 Line Number
+Program Header, which is used to encode embedded source.</p>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-dwarf-extended-content-types">
+<caption>AMDGPU DWARF Line Number Program Header Extended Content Types</caption>
+<colgroup>
+<col width="56%" />
+<col width="44%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Content Type</th>
+<th class="head">Form</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">DW_LNCT_LLVM_source</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">DW_FORM_line_strp</span></tt></td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+<p>The source field will contain the UTF-8 encoded, null-terminated source text
+with <tt class="docutils literal"><span class="pre">'\n'</span></tt> line endings. When the source field is present, consumers can use
+the embedded source instead of attempting to discover the source on disk. When
+the source field is absent, consumers can access the file to get the source
+text.</p>
+<p>The above content type appears in the <tt class="docutils literal"><span class="pre">file_name_entry_format</span></tt> field of the
+line table prologue, and its corresponding value appear in the <tt class="docutils literal"><span class="pre">file_names</span></tt>
+field. The current encoding of the content type is documented in table
+<a class="reference internal" href="#amdgpu-dwarf-extended-content-types-encoding"><em>AMDGPU DWARF Line Number Program Header Extended Content Types Encoding</em></a></p>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-dwarf-extended-content-types-encoding">
+<caption>AMDGPU DWARF Line Number Program Header Extended Content Types Encoding</caption>
+<colgroup>
+<col width="58%" />
+<col width="42%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Content Type</th>
+<th class="head">Value</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">DW_LNCT_LLVM_source</span></tt></td>
+<td>0x2001</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+</div>
+</div>
+<div class="section" id="code-conventions">
+<span id="amdgpu-code-conventions"></span><h2><a class="toc-backref" href="#id61">Code Conventions</a><a class="headerlink" href="#code-conventions" title="Permalink to this headline">¶</a></h2>
+<p>This section provides code conventions used for each supported target triple OS
+(see <a class="reference internal" href="#amdgpu-target-triples"><em>Target Triples</em></a>).</p>
+<div class="section" id="amdhsa">
+<h3><a class="toc-backref" href="#id62">AMDHSA</a><a class="headerlink" href="#amdhsa" title="Permalink to this headline">¶</a></h3>
+<p>This section provides code conventions used when the target triple OS is
+<tt class="docutils literal"><span class="pre">amdhsa</span></tt> (see <a class="reference internal" href="#amdgpu-target-triples"><em>Target Triples</em></a>).</p>
+<div class="section" id="code-object-target-identification">
+<span id="amdgpu-amdhsa-code-object-target-identification"></span><h4><a class="toc-backref" href="#id63">Code Object Target Identification</a><a class="headerlink" href="#code-object-target-identification" title="Permalink to this headline">¶</a></h4>
+<p>The AMDHSA OS uses the following syntax to specify the code object
+target as a single string:</p>
+<blockquote>
+<div><tt class="docutils literal"><span class="pre"><Architecture>-<Vendor>-<OS>-<Environment>-<Processor><Target</span> <span class="pre">Features></span></tt></div></blockquote>
+<p>Where:</p>
+<blockquote>
+<div><ul class="simple">
+<li><tt class="docutils literal"><span class="pre"><Architecture></span></tt>, <tt class="docutils literal"><span class="pre"><Vendor></span></tt>, <tt class="docutils literal"><span class="pre"><OS></span></tt> and <tt class="docutils literal"><span class="pre"><Environment></span></tt>
+are the same as the <em>Target Triple</em> (see
+<a class="reference internal" href="#amdgpu-target-triples"><em>Target Triples</em></a>).</li>
+<li><tt class="docutils literal"><span class="pre"><Processor></span></tt> is the same as the <em>Processor</em> (see
+<a class="reference internal" href="#amdgpu-processors"><em>Processors</em></a>).</li>
+<li><tt class="docutils literal"><span class="pre"><Target</span> <span class="pre">Features></span></tt> is a list of the enabled <em>Target Features</em>
+(see <a class="reference internal" href="#amdgpu-target-features"><em>Target Features</em></a>), each prefixed by a plus, that
+apply to <em>Processor</em>. The list must be in the same order as listed
+in the table <a class="reference internal" href="#amdgpu-target-feature-table"><em>AMDGPU Target Features</em></a>. Note that <em>Target
+Features</em> must be included in the list if they are enabled even if
+that is the default for <em>Processor</em>.</li>
+</ul>
+</div></blockquote>
+<p>For example:</p>
+<blockquote>
+<div><tt class="docutils literal"><span class="pre">"amdgcn-amd-amdhsa--gfx902+xnack"</span></tt></div></blockquote>
+</div>
+<div class="section" id="code-object-metadata">
+<span id="amdgpu-amdhsa-code-object-metadata"></span><h4><a class="toc-backref" href="#id64">Code Object Metadata</a><a class="headerlink" href="#code-object-metadata" title="Permalink to this headline">¶</a></h4>
+<p>The code object metadata specifies extensible metadata associated with the code
+objects executed on HSA <a class="reference internal" href="#hsa">[HSA]</a> compatible runtimes such as AMD’s ROCm
+<a class="reference internal" href="#amd-rocm">[AMD-ROCm]</a>. It is specified by the <tt class="docutils literal"><span class="pre">NT_AMD_AMDGPU_HSA_METADATA</span></tt> note record
+(see <a class="reference internal" href="#amdgpu-note-records"><em>Note Records</em></a>) and is required when the target triple OS is
+<tt class="docutils literal"><span class="pre">amdhsa</span></tt> (see <a class="reference internal" href="#amdgpu-target-triples"><em>Target Triples</em></a>). It must contain the minimum
+information necessary to support the ROCM kernel queries. For example, the
+segment sizes needed in a dispatch packet. In addition, a high level language
+runtime may require other information to be included. For example, the AMD
+OpenCL runtime records kernel argument information.</p>
+<p>The metadata is specified as a YAML formatted string (see <a class="reference internal" href="#yaml">[YAML]</a> and
+<a class="reference internal" href="YamlIO.html"><em>YAML I/O</em></a>).</p>
+<p>The metadata is represented as a single YAML document comprised of the mapping
+defined in table <a class="reference internal" href="#amdgpu-amdhsa-code-object-metadata-mapping-table"><em>AMDHSA Code Object Metadata Mapping</em></a> and
+referenced tables.</p>
+<p>For boolean values, the string values of <tt class="docutils literal"><span class="pre">false</span></tt> and <tt class="docutils literal"><span class="pre">true</span></tt> are used for
+false and true respectively.</p>
+<p>Additional information can be added to the mappings. To avoid conflicts, any
+non-AMD key names should be prefixed by “<em>vendor-name</em>.”.</p>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-amdhsa-code-object-metadata-mapping-table">
+<caption>AMDHSA Code Object Metadata Mapping</caption>
+<colgroup>
+<col width="11%" />
+<col width="15%" />
+<col width="9%" />
+<col width="65%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">String Key</th>
+<th class="head">Value Type</th>
+<th class="head">Required?</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>“Version”</td>
+<td>sequence of
+2 integers</td>
+<td>Required</td>
+<td><ul class="first last simple">
+<li>The first integer is the major
+version. Currently 1.</li>
+<li>The second integer is the minor
+version. Currently 0.</li>
+</ul>
+</td>
+</tr>
+<tr class="row-odd"><td>“Printf”</td>
+<td>sequence of
+strings</td>
+<td> </td>
+<td><p class="first">Each string is encoded information
+about a printf function call. The
+encoded information is organized as
+fields separated by colon (‘:’):</p>
+<p><tt class="docutils literal"><span class="pre">ID:N:S[0]:S[1]:...:S[N-1]:FormatString</span></tt></p>
+<p>where:</p>
+<dl class="last docutils">
+<dt><tt class="docutils literal"><span class="pre">ID</span></tt></dt>
+<dd>A 32 bit integer as a unique id for
+each printf function call</dd>
+<dt><tt class="docutils literal"><span class="pre">N</span></tt></dt>
+<dd>A 32 bit integer equal to the number
+of arguments of printf function call
+minus 1</dd>
+<dt><tt class="docutils literal"><span class="pre">S[i]</span></tt> (where i = 0, 1, ... , N-1)</dt>
+<dd>32 bit integers for the size in bytes
+of the i-th FormatString argument of
+the printf function call</dd>
+<dt>FormatString</dt>
+<dd>The format string passed to the
+printf function call.</dd>
+</dl>
+</td>
+</tr>
+<tr class="row-even"><td>“Kernels”</td>
+<td>sequence of
+mapping</td>
+<td>Required</td>
+<td>Sequence of the mappings for each
+kernel in the code object. See
+<a class="reference internal" href="#amdgpu-amdhsa-code-object-kernel-metadata-mapping-table"><em>AMDHSA Code Object Kernel Metadata Mapping</em></a>
+for the definition of the mapping.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-amdhsa-code-object-kernel-metadata-mapping-table">
+<caption>AMDHSA Code Object Kernel Metadata Mapping</caption>
+<colgroup>
+<col width="14%" />
+<col width="12%" />
+<col width="8%" />
+<col width="66%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">String Key</th>
+<th class="head">Value Type</th>
+<th class="head">Required?</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>“Name”</td>
+<td>string</td>
+<td>Required</td>
+<td>Source name of the kernel.</td>
+</tr>
+<tr class="row-odd"><td>“SymbolName”</td>
+<td>string</td>
+<td>Required</td>
+<td>Name of the kernel
+descriptor ELF symbol.</td>
+</tr>
+<tr class="row-even"><td>“Language”</td>
+<td>string</td>
+<td> </td>
+<td><p class="first">Source language of the kernel.
+Values include:</p>
+<ul class="last simple">
+<li>“OpenCL C”</li>
+<li>“OpenCL C++”</li>
+<li>“HCC”</li>
+<li>“OpenMP”</li>
+</ul>
+</td>
+</tr>
+<tr class="row-odd"><td>“LanguageVersion”</td>
+<td>sequence of
+2 integers</td>
+<td> </td>
+<td><ul class="first last simple">
+<li>The first integer is the major
+version.</li>
+<li>The second integer is the
+minor version.</li>
+</ul>
+</td>
+</tr>
+<tr class="row-even"><td>“Attrs”</td>
+<td>mapping</td>
+<td> </td>
+<td>Mapping of kernel attributes.
+See
+<a class="reference internal" href="#amdgpu-amdhsa-code-object-kernel-attribute-metadata-mapping-table"><em>AMDHSA Code Object Kernel Attribute Metadata Mapping</em></a>
+for the mapping definition.</td>
+</tr>
+<tr class="row-odd"><td>“Args”</td>
+<td>sequence of
+mapping</td>
+<td> </td>
+<td>Sequence of mappings of the
+kernel arguments. See
+<a class="reference internal" href="#amdgpu-amdhsa-code-object-kernel-argument-metadata-mapping-table"><em>AMDHSA Code Object Kernel Argument Metadata Mapping</em></a>
+for the definition of the mapping.</td>
+</tr>
+<tr class="row-even"><td>“CodeProps”</td>
+<td>mapping</td>
+<td> </td>
+<td>Mapping of properties related to
+the kernel code. See
+<a class="reference internal" href="#amdgpu-amdhsa-code-object-kernel-code-properties-metadata-mapping-table"><em>AMDHSA Code Object Kernel Code Properties Metadata Mapping</em></a>
+for the mapping definition.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-amdhsa-code-object-kernel-attribute-metadata-mapping-table">
+<caption>AMDHSA Code Object Kernel Attribute Metadata Mapping</caption>
+<colgroup>
+<col width="26%" />
+<col width="19%" />
+<col width="13%" />
+<col width="42%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">String Key</th>
+<th class="head">Value Type</th>
+<th class="head">Required?</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>“ReqdWorkGroupSize”</td>
+<td>sequence of
+3 integers</td>
+<td> </td>
+<td><p class="first">If not 0, 0, 0 then all values
+must be >=1 and the dispatch
+work-group size X, Y, Z must
+correspond to the specified
+values. Defaults to 0, 0, 0.</p>
+<p class="last">Corresponds to the OpenCL
+<tt class="docutils literal"><span class="pre">reqd_work_group_size</span></tt>
+attribute.</p>
+</td>
+</tr>
+<tr class="row-odd"><td>“WorkGroupSizeHint”</td>
+<td>sequence of
+3 integers</td>
+<td> </td>
+<td><p class="first">The dispatch work-group size
+X, Y, Z is likely to be the
+specified values.</p>
+<p class="last">Corresponds to the OpenCL
+<tt class="docutils literal"><span class="pre">work_group_size_hint</span></tt>
+attribute.</p>
+</td>
+</tr>
+<tr class="row-even"><td>“VecTypeHint”</td>
+<td>string</td>
+<td> </td>
+<td><p class="first">The name of a scalar or vector
+type.</p>
+<p class="last">Corresponds to the OpenCL
+<tt class="docutils literal"><span class="pre">vec_type_hint</span></tt> attribute.</p>
+</td>
+</tr>
+<tr class="row-odd"><td>“RuntimeHandle”</td>
+<td>string</td>
+<td> </td>
+<td>The external symbol name
+associated with a kernel.
+OpenCL runtime allocates a
+global buffer for the symbol
+and saves the kernel’s address
+to it, which is used for
+device side enqueueing. Only
+available for device side
+enqueued kernels.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-amdhsa-code-object-kernel-argument-metadata-mapping-table">
+<caption>AMDHSA Code Object Kernel Argument Metadata Mapping</caption>
+<colgroup>
+<col width="22%" />
+<col width="18%" />
+<col width="12%" />
+<col width="49%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">String Key</th>
+<th class="head">Value Type</th>
+<th class="head">Required?</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>“Name”</td>
+<td>string</td>
+<td> </td>
+<td>Kernel argument name.</td>
+</tr>
+<tr class="row-odd"><td>“TypeName”</td>
+<td>string</td>
+<td> </td>
+<td>Kernel argument type name.</td>
+</tr>
+<tr class="row-even"><td>“Size”</td>
+<td>integer</td>
+<td>Required</td>
+<td>Kernel argument size in bytes.</td>
+</tr>
+<tr class="row-odd"><td>“Align”</td>
+<td>integer</td>
+<td>Required</td>
+<td>Kernel argument alignment in
+bytes. Must be a power of two.</td>
+</tr>
+<tr class="row-even"><td>“ValueKind”</td>
+<td>string</td>
+<td>Required</td>
+<td><p class="first">Kernel argument kind that
+specifies how to set up the
+corresponding argument.
+Values include:</p>
+<dl class="last docutils">
+<dt>“ByValue”</dt>
+<dd>The argument is copied
+directly into the kernarg.</dd>
+<dt>“GlobalBuffer”</dt>
+<dd>A global address space pointer
+to the buffer data is passed
+in the kernarg.</dd>
+<dt>“DynamicSharedPointer”</dt>
+<dd>A group address space pointer
+to dynamically allocated LDS
+is passed in the kernarg.</dd>
+<dt>“Sampler”</dt>
+<dd>A global address space
+pointer to a S# is passed in
+the kernarg.</dd>
+<dt>“Image”</dt>
+<dd>A global address space
+pointer to a T# is passed in
+the kernarg.</dd>
+<dt>“Pipe”</dt>
+<dd>A global address space pointer
+to an OpenCL pipe is passed in
+the kernarg.</dd>
+<dt>“Queue”</dt>
+<dd>A global address space pointer
+to an OpenCL device enqueue
+queue is passed in the
+kernarg.</dd>
+<dt>“HiddenGlobalOffsetX”</dt>
+<dd>The OpenCL grid dispatch
+global offset for the X
+dimension is passed in the
+kernarg.</dd>
+<dt>“HiddenGlobalOffsetY”</dt>
+<dd>The OpenCL grid dispatch
+global offset for the Y
+dimension is passed in the
+kernarg.</dd>
+<dt>“HiddenGlobalOffsetZ”</dt>
+<dd>The OpenCL grid dispatch
+global offset for the Z
+dimension is passed in the
+kernarg.</dd>
+<dt>“HiddenNone”</dt>
+<dd>An argument that is not used
+by the kernel. Space needs to
+be left for it, but it does
+not need to be set up.</dd>
+<dt>“HiddenPrintfBuffer”</dt>
+<dd>A global address space pointer
+to the runtime printf buffer
+is passed in kernarg.</dd>
+<dt>“HiddenDefaultQueue”</dt>
+<dd>A global address space pointer
+to the OpenCL device enqueue
+queue that should be used by
+the kernel by default is
+passed in the kernarg.</dd>
+<dt>“HiddenCompletionAction”</dt>
+<dd>A global address space pointer
+to help link enqueued kernels into
+the ancestor tree for determining
+when the parent kernel has finished.</dd>
+</dl>
+</td>
+</tr>
+<tr class="row-odd"><td>“ValueType”</td>
+<td>string</td>
+<td>Required</td>
+<td><p class="first">Kernel argument value type. Only
+present if “ValueKind” is
+“ByValue”. For vector data
+types, the value is for the
+element type. Values include:</p>
+<ul class="last simple">
+<li>“Struct”</li>
+<li>“I8”</li>
+<li>“U8”</li>
+<li>“I16”</li>
+<li>“U16”</li>
+<li>“F16”</li>
+<li>“I32”</li>
+<li>“U32”</li>
+<li>“F32”</li>
+<li>“I64”</li>
+<li>“U64”</li>
+<li>“F64”</li>
+</ul>
+</td>
+</tr>
+<tr class="row-even"><td>“PointeeAlign”</td>
+<td>integer</td>
+<td> </td>
+<td>Alignment in bytes of pointee
+type for pointer type kernel
+argument. Must be a power
+of 2. Only present if
+“ValueKind” is
+“DynamicSharedPointer”.</td>
+</tr>
+<tr class="row-odd"><td>“AddrSpaceQual”</td>
+<td>string</td>
+<td> </td>
+<td><p class="first">Kernel argument address space
+qualifier. Only present if
+“ValueKind” is “GlobalBuffer” or
+“DynamicSharedPointer”. Values
+are:</p>
+<ul class="last simple">
+<li>“Private”</li>
+<li>“Global”</li>
+<li>“Constant”</li>
+<li>“Local”</li>
+<li>“Generic”</li>
+<li>“Region”</li>
+</ul>
+</td>
+</tr>
+<tr class="row-even"><td>“AccQual”</td>
+<td>string</td>
+<td> </td>
+<td><p class="first">Kernel argument access
+qualifier. Only present if
+“ValueKind” is “Image” or
+“Pipe”. Values
+are:</p>
+<ul class="last simple">
+<li>“ReadOnly”</li>
+<li>“WriteOnly”</li>
+<li>“ReadWrite”</li>
+</ul>
+</td>
+</tr>
+<tr class="row-odd"><td>“ActualAccQual”</td>
+<td>string</td>
+<td> </td>
+<td><p class="first">The actual memory accesses
+performed by the kernel on the
+kernel argument. Only present if
+“ValueKind” is “GlobalBuffer”,
+“Image”, or “Pipe”. This may be
+more restrictive than indicated
+by “AccQual” to reflect what the
+kernel actual does. If not
+present then the runtime must
+assume what is implied by
+“AccQual” and “IsConst”. Values
+are:</p>
+<ul class="last simple">
+<li>“ReadOnly”</li>
+<li>“WriteOnly”</li>
+<li>“ReadWrite”</li>
+</ul>
+</td>
+</tr>
+<tr class="row-even"><td>“IsConst”</td>
+<td>boolean</td>
+<td> </td>
+<td>Indicates if the kernel argument
+is const qualified. Only present
+if “ValueKind” is
+“GlobalBuffer”.</td>
+</tr>
+<tr class="row-odd"><td>“IsRestrict”</td>
+<td>boolean</td>
+<td> </td>
+<td>Indicates if the kernel argument
+is restrict qualified. Only
+present if “ValueKind” is
+“GlobalBuffer”.</td>
+</tr>
+<tr class="row-even"><td>“IsVolatile”</td>
+<td>boolean</td>
+<td> </td>
+<td>Indicates if the kernel argument
+is volatile qualified. Only
+present if “ValueKind” is
+“GlobalBuffer”.</td>
+</tr>
+<tr class="row-odd"><td>“IsPipe”</td>
+<td>boolean</td>
+<td> </td>
+<td>Indicates if the kernel argument
+is pipe qualified. Only present
+if “ValueKind” is “Pipe”.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-amdhsa-code-object-kernel-code-properties-metadata-mapping-table">
+<caption>AMDHSA Code Object Kernel Code Properties Metadata Mapping</caption>
+<colgroup>
+<col width="39%" />
+<col width="19%" />
+<col width="13%" />
+<col width="29%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">String Key</th>
+<th class="head">Value Type</th>
+<th class="head">Required?</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>“KernargSegmentSize”</td>
+<td>integer</td>
+<td>Required</td>
+<td>The size in bytes of
+the kernarg segment
+that holds the values
+of the arguments to
+the kernel.</td>
+</tr>
+<tr class="row-odd"><td>“GroupSegmentFixedSize”</td>
+<td>integer</td>
+<td>Required</td>
+<td>The amount of group
+segment memory
+required by a
+work-group in
+bytes. This does not
+include any
+dynamically allocated
+group segment memory
+that may be added
+when the kernel is
+dispatched.</td>
+</tr>
+<tr class="row-even"><td>“PrivateSegmentFixedSize”</td>
+<td>integer</td>
+<td>Required</td>
+<td>The amount of fixed
+private address space
+memory required for a
+work-item in
+bytes. If the kernel
+uses a dynamic call
+stack then additional
+space must be added
+to this value for the
+call stack.</td>
+</tr>
+<tr class="row-odd"><td>“KernargSegmentAlign”</td>
+<td>integer</td>
+<td>Required</td>
+<td>The maximum byte
+alignment of
+arguments in the
+kernarg segment. Must
+be a power of 2.</td>
+</tr>
+<tr class="row-even"><td>“WavefrontSize”</td>
+<td>integer</td>
+<td>Required</td>
+<td>Wavefront size. Must
+be a power of 2.</td>
+</tr>
+<tr class="row-odd"><td>“NumSGPRs”</td>
+<td>integer</td>
+<td>Required</td>
+<td>Number of scalar
+registers used by a
+wavefront for
+GFX6-GFX9. This
+includes the special
+SGPRs for VCC, Flat
+Scratch (GFX7-GFX9)
+and XNACK (for
+GFX8-GFX9). It does
+not include the 16
+SGPR added if a trap
+handler is
+enabled. It is not
+rounded up to the
+allocation
+granularity.</td>
+</tr>
+<tr class="row-even"><td>“NumVGPRs”</td>
+<td>integer</td>
+<td>Required</td>
+<td>Number of vector
+registers used by
+each work-item for
+GFX6-GFX9</td>
+</tr>
+<tr class="row-odd"><td>“MaxFlatWorkGroupSize”</td>
+<td>integer</td>
+<td>Required</td>
+<td>Maximum flat
+work-group size
+supported by the
+kernel in work-items.
+Must be >=1 and
+consistent with
+ReqdWorkGroupSize if
+not 0, 0, 0.</td>
+</tr>
+<tr class="row-even"><td>“NumSpilledSGPRs”</td>
+<td>integer</td>
+<td> </td>
+<td>Number of stores from
+a scalar register to
+a register allocator
+created spill
+location.</td>
+</tr>
+<tr class="row-odd"><td>“NumSpilledVGPRs”</td>
+<td>integer</td>
+<td> </td>
+<td>Number of stores from
+a vector register to
+a register allocator
+created spill
+location.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="kernel-dispatch">
+<h4><a class="toc-backref" href="#id65">Kernel Dispatch</a><a class="headerlink" href="#kernel-dispatch" title="Permalink to this headline">¶</a></h4>
+<p>The HSA architected queuing language (AQL) defines a user space memory interface
+that can be used to control the dispatch of kernels, in an agent independent
+way. An agent can have zero or more AQL queues created for it using the ROCm
+runtime, in which AQL packets (all of which are 64 bytes) can be placed. See the
+<em>HSA Platform System Architecture Specification</em> <a class="reference internal" href="#hsa">[HSA]</a> for the AQL queue
+mechanics and packet layouts.</p>
+<p>The packet processor of a kernel agent is responsible for detecting and
+dispatching HSA kernels from the AQL queues associated with it. For AMD GPUs the
+packet processor is implemented by the hardware command processor (CP),
+asynchronous dispatch controller (ADC) and shader processor input controller
+(SPI).</p>
+<p>The ROCm runtime can be used to allocate an AQL queue object. It uses the kernel
+mode driver to initialize and register the AQL queue with CP.</p>
+<p>To dispatch a kernel the following actions are performed. This can occur in the
+CPU host program, or from an HSA kernel executing on a GPU.</p>
+<ol class="arabic simple">
+<li>A pointer to an AQL queue for the kernel agent on which the kernel is to be
+executed is obtained.</li>
+<li>A pointer to the kernel descriptor (see
+<a class="reference internal" href="#amdgpu-amdhsa-kernel-descriptor"><em>Kernel Descriptor</em></a>) of the kernel to execute is
+obtained. It must be for a kernel that is contained in a code object that that
+was loaded by the ROCm runtime on the kernel agent with which the AQL queue is
+associated.</li>
+<li>Space is allocated for the kernel arguments using the ROCm runtime allocator
+for a memory region with the kernarg property for the kernel agent that will
+execute the kernel. It must be at least 16 byte aligned.</li>
+<li>Kernel argument values are assigned to the kernel argument memory
+allocation. The layout is defined in the <em>HSA Programmer’s Language Reference</em>
+<a class="reference internal" href="#hsa">[HSA]</a>. For AMDGPU the kernel execution directly accesses the kernel argument
+memory in the same way constant memory is accessed. (Note that the HSA
+specification allows an implementation to copy the kernel argument contents to
+another location that is accessed by the kernel.)</li>
+<li>An AQL kernel dispatch packet is created on the AQL queue. The ROCm runtime
+api uses 64 bit atomic operations to reserve space in the AQL queue for the
+packet. The packet must be set up, and the final write must use an atomic
+store release to set the packet kind to ensure the packet contents are
+visible to the kernel agent. AQL defines a doorbell signal mechanism to
+notify the kernel agent that the AQL queue has been updated. These rules, and
+the layout of the AQL queue and kernel dispatch packet is defined in the <em>HSA
+System Architecture Specification</em> <a class="reference internal" href="#hsa">[HSA]</a>.</li>
+<li>A kernel dispatch packet includes information about the actual dispatch,
+such as grid and work-group size, together with information from the code
+object about the kernel, such as segment sizes. The ROCm runtime queries on
+the kernel symbol can be used to obtain the code object values which are
+recorded in the <a class="reference internal" href="#amdgpu-amdhsa-code-object-metadata"><em>Code Object Metadata</em></a>.</li>
+<li>CP executes micro-code and is responsible for detecting and setting up the
+GPU to execute the wavefronts of a kernel dispatch.</li>
+<li>CP ensures that when the a wavefront starts executing the kernel machine
+code, the scalar general purpose registers (SGPR) and vector general purpose
+registers (VGPR) are set up as required by the machine code. The required
+setup is defined in the <a class="reference internal" href="#amdgpu-amdhsa-kernel-descriptor"><em>Kernel Descriptor</em></a>. The initial
+register state is defined in
+<a class="reference internal" href="#amdgpu-amdhsa-initial-kernel-execution-state"><em>Initial Kernel Execution State</em></a>.</li>
+<li>The prolog of the kernel machine code (see
+<a class="reference internal" href="#amdgpu-amdhsa-kernel-prolog"><em>Kernel Prolog</em></a>) sets up the machine state as necessary
+before continuing executing the machine code that corresponds to the kernel.</li>
+<li>When the kernel dispatch has completed execution, CP signals the completion
+signal specified in the kernel dispatch packet if not 0.</li>
+</ol>
+</div>
+<div class="section" id="memory-spaces">
+<span id="amdgpu-amdhsa-memory-spaces"></span><h4><a class="toc-backref" href="#id66">Memory Spaces</a><a class="headerlink" href="#memory-spaces" title="Permalink to this headline">¶</a></h4>
+<p>The memory space properties are:</p>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-amdhsa-memory-spaces-table">
+<caption>AMDHSA Memory Spaces</caption>
+<colgroup>
+<col width="28%" />
+<col width="18%" />
+<col width="13%" />
+<col width="11%" />
+<col width="30%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Memory Space Name</th>
+<th class="head">HSA Segment
+Name</th>
+<th class="head">Hardware
+Name</th>
+<th class="head">Address
+Size</th>
+<th class="head">NULL Value</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>Private</td>
+<td>private</td>
+<td>scratch</td>
+<td>32</td>
+<td>0x00000000</td>
+</tr>
+<tr class="row-odd"><td>Local</td>
+<td>group</td>
+<td>LDS</td>
+<td>32</td>
+<td>0xFFFFFFFF</td>
+</tr>
+<tr class="row-even"><td>Global</td>
+<td>global</td>
+<td>global</td>
+<td>64</td>
+<td>0x0000000000000000</td>
+</tr>
+<tr class="row-odd"><td>Constant</td>
+<td>constant</td>
+<td><em>same as
+global</em></td>
+<td>64</td>
+<td>0x0000000000000000</td>
+</tr>
+<tr class="row-even"><td>Generic</td>
+<td>flat</td>
+<td>flat</td>
+<td>64</td>
+<td>0x0000000000000000</td>
+</tr>
+<tr class="row-odd"><td>Region</td>
+<td>N/A</td>
+<td>GDS</td>
+<td>32</td>
+<td><em>not implemented
+for AMDHSA</em></td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+<p>The global and constant memory spaces both use global virtual addresses, which
+are the same virtual address space used by the CPU. However, some virtual
+addresses may only be accessible to the CPU, some only accessible by the GPU,
+and some by both.</p>
+<p>Using the constant memory space indicates that the data will not change during
+the execution of the kernel. This allows scalar read instructions to be
+used. The vector and scalar L1 caches are invalidated of volatile data before
+each kernel dispatch execution to allow constant memory to change values between
+kernel dispatches.</p>
+<p>The local memory space uses the hardware Local Data Store (LDS) which is
+automatically allocated when the hardware creates work-groups of wavefronts, and
+freed when all the wavefronts of a work-group have terminated. The data store
+(DS) instructions can be used to access it.</p>
+<p>The private memory space uses the hardware scratch memory support. If the kernel
+uses scratch, then the hardware allocates memory that is accessed using
+wavefront lane dword (4 byte) interleaving. The mapping used from private
+address to physical address is:</p>
+<blockquote>
+<div><tt class="docutils literal"><span class="pre">wavefront-scratch-base</span> <span class="pre">+</span>
+<span class="pre">(private-address</span> <span class="pre">*</span> <span class="pre">wavefront-size</span> <span class="pre">*</span> <span class="pre">4)</span> <span class="pre">+</span>
+<span class="pre">(wavefront-lane-id</span> <span class="pre">*</span> <span class="pre">4)</span></tt></div></blockquote>
+<p>There are different ways that the wavefront scratch base address is determined
+by a wavefront (see <a class="reference internal" href="#amdgpu-amdhsa-initial-kernel-execution-state"><em>Initial Kernel Execution State</em></a>). This
+memory can be accessed in an interleaved manner using buffer instruction with
+the scratch buffer descriptor and per wavefront scratch offset, by the scratch
+instructions, or by flat instructions. If each lane of a wavefront accesses the
+same private address, the interleaving results in adjacent dwords being accessed
+and hence requires fewer cache lines to be fetched. Multi-dword access is not
+supported except by flat and scratch instructions in GFX9.</p>
+<p>The generic address space uses the hardware flat address support available in
+GFX7-GFX9. This uses two fixed ranges of virtual addresses (the private and
+local appertures), that are outside the range of addressible global memory, to
+map from a flat address to a private or local address.</p>
+<p>FLAT instructions can take a flat address and access global, private (scratch)
+and group (LDS) memory depending in if the address is within one of the
+apperture ranges. Flat access to scratch requires hardware aperture setup and
+setup in the kernel prologue (see <a class="reference internal" href="#amdgpu-amdhsa-flat-scratch"><em>Flat Scratch</em></a>). Flat
+access to LDS requires hardware aperture setup and M0 (GFX7-GFX8) register setup
+(see <a class="reference internal" href="#amdgpu-amdhsa-m0"><em>M0</em></a>).</p>
+<p>To convert between a segment address and a flat address the base address of the
+appertures address can be used. For GFX7-GFX8 these are available in the
+<a class="reference internal" href="#amdgpu-amdhsa-hsa-aql-queue"><em>HSA AQL Queue</em></a> the address of which can be obtained with
+Queue Ptr SGPR (see <a class="reference internal" href="#amdgpu-amdhsa-initial-kernel-execution-state"><em>Initial Kernel Execution State</em></a>). For
+GFX9 the appature base addresses are directly available as inline constant
+registers <tt class="docutils literal"><span class="pre">SRC_SHARED_BASE/LIMIT</span></tt> and <tt class="docutils literal"><span class="pre">SRC_PRIVATE_BASE/LIMIT</span></tt>. In 64 bit
+address mode the apperture sizes are 2^32 bytes and the base is aligned to 2^32
+which makes it easier to convert from flat to segment or segment to flat.</p>
+</div>
+<div class="section" id="image-and-samplers">
+<h4><a class="toc-backref" href="#id67">Image and Samplers</a><a class="headerlink" href="#image-and-samplers" title="Permalink to this headline">¶</a></h4>
+<p>Image and sample handles created by the ROCm runtime are 64 bit addresses of a
+hardware 32 byte V# and 48 byte S# object respectively. In order to support the
+HSA <tt class="docutils literal"><span class="pre">query_sampler</span></tt> operations two extra dwords are used to store the HSA BRIG
+enumeration values for the queries that are not trivially deducible from the S#
+representation.</p>
+</div>
+<div class="section" id="hsa-signals">
+<h4><a class="toc-backref" href="#id68">HSA Signals</a><a class="headerlink" href="#hsa-signals" title="Permalink to this headline">¶</a></h4>
+<p>HSA signal handles created by the ROCm runtime are 64 bit addresses of a
+structure allocated in memory accessible from both the CPU and GPU. The
+structure is defined by the ROCm runtime and subject to change between releases
+(see <a class="reference internal" href="#amd-rocm-github">[AMD-ROCm-github]</a>).</p>
+</div>
+<div class="section" id="hsa-aql-queue">
+<span id="amdgpu-amdhsa-hsa-aql-queue"></span><h4><a class="toc-backref" href="#id69">HSA AQL Queue</a><a class="headerlink" href="#hsa-aql-queue" title="Permalink to this headline">¶</a></h4>
+<p>The HSA AQL queue structure is defined by the ROCm runtime and subject to change
+between releases (see <a class="reference internal" href="#amd-rocm-github">[AMD-ROCm-github]</a>). For some processors it contains
+fields needed to implement certain language features such as the flat address
+aperture bases. It also contains fields used by CP such as managing the
+allocation of scratch memory.</p>
+</div>
+<div class="section" id="kernel-descriptor">
+<span id="amdgpu-amdhsa-kernel-descriptor"></span><h4><a class="toc-backref" href="#id70">Kernel Descriptor</a><a class="headerlink" href="#kernel-descriptor" title="Permalink to this headline">¶</a></h4>
+<p>A kernel descriptor consists of the information needed by CP to initiate the
+execution of a kernel, including the entry point address of the machine code
+that implements the kernel.</p>
+<div class="section" id="kernel-descriptor-for-gfx6-gfx9">
+<h5><a class="toc-backref" href="#id71">Kernel Descriptor for GFX6-GFX9</a><a class="headerlink" href="#kernel-descriptor-for-gfx6-gfx9" title="Permalink to this headline">¶</a></h5>
+<p>CP microcode requires the Kernel descriptor to be allocated on 64 byte
+alignment.</p>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-amdhsa-kernel-descriptor-gfx6-gfx9-table">
+<caption>Kernel Descriptor for GFX6-GFX9</caption>
+<colgroup>
+<col width="7%" />
+<col width="7%" />
+<col width="31%" />
+<col width="55%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Bits</th>
+<th class="head">Size</th>
+<th class="head">Field Name</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>31:0</td>
+<td>4 bytes</td>
+<td>GROUP_SEGMENT_FIXED_SIZE</td>
+<td>The amount of fixed local
+address space memory
+required for a work-group
+in bytes. This does not
+include any dynamically
+allocated local address
+space memory that may be
+added when the kernel is
+dispatched.</td>
+</tr>
+<tr class="row-odd"><td>63:32</td>
+<td>4 bytes</td>
+<td>PRIVATE_SEGMENT_FIXED_SIZE</td>
+<td>The amount of fixed
+private address space
+memory required for a
+work-item in bytes. If
+is_dynamic_callstack is 1
+then additional space must
+be added to this value for
+the call stack.</td>
+</tr>
+<tr class="row-even"><td>127:64</td>
+<td>8 bytes</td>
+<td> </td>
+<td>Reserved, must be 0.</td>
+</tr>
+<tr class="row-odd"><td>191:128</td>
+<td>8 bytes</td>
+<td>KERNEL_CODE_ENTRY_BYTE_OFFSET</td>
+<td>Byte offset (possibly
+negative) from base
+address of kernel
+descriptor to kernel’s
+entry point instruction
+which must be 256 byte
+aligned.</td>
+</tr>
+<tr class="row-even"><td>383:192</td>
+<td>24
+bytes</td>
+<td> </td>
+<td>Reserved, must be 0.</td>
+</tr>
+<tr class="row-odd"><td>415:384</td>
+<td>4 bytes</td>
+<td>COMPUTE_PGM_RSRC1</td>
+<td>Compute Shader (CS)
+program settings used by
+CP to set up
+<tt class="docutils literal"><span class="pre">COMPUTE_PGM_RSRC1</span></tt>
+configuration
+register. See
+<a class="reference internal" href="#amdgpu-amdhsa-compute-pgm-rsrc1-gfx6-gfx9-table"><em>compute_pgm_rsrc1 for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-even"><td>447:416</td>
+<td>4 bytes</td>
+<td>COMPUTE_PGM_RSRC2</td>
+<td>Compute Shader (CS)
+program settings used by
+CP to set up
+<tt class="docutils literal"><span class="pre">COMPUTE_PGM_RSRC2</span></tt>
+configuration
+register. See
+<a class="reference internal" href="#amdgpu-amdhsa-compute-pgm-rsrc2-gfx6-gfx9-table"><em>compute_pgm_rsrc2 for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-odd"><td>448</td>
+<td>1 bit</td>
+<td>ENABLE_SGPR_PRIVATE_SEGMENT
+_BUFFER</td>
+<td><p class="first">Enable the setup of the
+SGPR user data registers
+(see
+<a class="reference internal" href="#amdgpu-amdhsa-initial-kernel-execution-state"><em>Initial Kernel Execution State</em></a>).</p>
+<p class="last">The total number of SGPR
+user data registers
+requested must not exceed
+16 and match value in
+<tt class="docutils literal"><span class="pre">compute_pgm_rsrc2.user_sgpr.user_sgpr_count</span></tt>.
+Any requests beyond 16
+will be ignored.</p>
+</td>
+</tr>
+<tr class="row-even"><td>449</td>
+<td>1 bit</td>
+<td>ENABLE_SGPR_DISPATCH_PTR</td>
+<td><em>see above</em></td>
+</tr>
+<tr class="row-odd"><td>450</td>
+<td>1 bit</td>
+<td>ENABLE_SGPR_QUEUE_PTR</td>
+<td><em>see above</em></td>
+</tr>
+<tr class="row-even"><td>451</td>
+<td>1 bit</td>
+<td>ENABLE_SGPR_KERNARG_SEGMENT_PTR</td>
+<td><em>see above</em></td>
+</tr>
+<tr class="row-odd"><td>452</td>
+<td>1 bit</td>
+<td>ENABLE_SGPR_DISPATCH_ID</td>
+<td><em>see above</em></td>
+</tr>
+<tr class="row-even"><td>453</td>
+<td>1 bit</td>
+<td>ENABLE_SGPR_FLAT_SCRATCH_INIT</td>
+<td><em>see above</em></td>
+</tr>
+<tr class="row-odd"><td>454</td>
+<td>1 bit</td>
+<td>ENABLE_SGPR_PRIVATE_SEGMENT
+_SIZE</td>
+<td><em>see above</em></td>
+</tr>
+<tr class="row-even"><td>455</td>
+<td>1 bit</td>
+<td> </td>
+<td>Reserved, must be 0.</td>
+</tr>
+<tr class="row-odd"><td>511:456</td>
+<td>8 bytes</td>
+<td> </td>
+<td>Reserved, must be 0.</td>
+</tr>
+<tr class="row-even"><td>512</td>
+<td colspan="3"><strong>Total size 64 bytes.</strong></td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-amdhsa-compute-pgm-rsrc1-gfx6-gfx9-table">
+<caption>compute_pgm_rsrc1 for GFX6-GFX9</caption>
+<colgroup>
+<col width="6%" />
+<col width="6%" />
+<col width="26%" />
+<col width="63%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Bits</th>
+<th class="head">Size</th>
+<th class="head">Field Name</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>5:0</td>
+<td>6 bits</td>
+<td>GRANULATED_WORKITEM_VGPR_COUNT</td>
+<td><p class="first">Number of vector register
+blocks used by each work-item;
+granularity is device
+specific:</p>
+<dl class="docutils">
+<dt>GFX6-GFX9</dt>
+<dd><ul class="first last simple">
+<li>vgprs_used 0..256</li>
+<li>max(0, ceil(vgprs_used / 4) - 1)</li>
+</ul>
+</dd>
+</dl>
+<p>Where vgprs_used is defined
+as the highest VGPR number
+explicitly referenced plus
+one.</p>
+<p>Used by CP to set up
+<tt class="docutils literal"><span class="pre">COMPUTE_PGM_RSRC1.VGPRS</span></tt>.</p>
+<p class="last">The
+<a class="reference internal" href="#amdgpu-assembler"><em>Assembler</em></a>
+calculates this
+automatically for the
+selected processor from
+values provided to the
+<cite>.amdhsa_kernel</cite> directive
+by the
+<cite>.amdhsa_next_free_vgpr</cite>
+nested directive (see
+<a class="reference internal" href="#amdhsa-kernel-directives-table"><em>AMDHSA Kernel Assembler Directives</em></a>).</p>
+</td>
+</tr>
+<tr class="row-odd"><td>9:6</td>
+<td>4 bits</td>
+<td>GRANULATED_WAVEFRONT_SGPR_COUNT</td>
+<td><p class="first">Number of scalar register
+blocks used by a wavefront;
+granularity is device
+specific:</p>
+<dl class="docutils">
+<dt>GFX6-GFX8</dt>
+<dd><ul class="first last simple">
+<li>sgprs_used 0..112</li>
+<li>max(0, ceil(sgprs_used / 8) - 1)</li>
+</ul>
+</dd>
+<dt>GFX9</dt>
+<dd><ul class="first last simple">
+<li>sgprs_used 0..112</li>
+<li>2 * max(0, ceil(sgprs_used / 16) - 1)</li>
+</ul>
+</dd>
+</dl>
+<p>Where sgprs_used is
+defined as the highest
+SGPR number explicitly
+referenced plus one, plus
+a target-specific number
+of additional special
+SGPRs for VCC,
+FLAT_SCRATCH (GFX7+) and
+XNACK_MASK (GFX8+), and
+any additional
+target-specific
+limitations. It does not
+include the 16 SGPRs added
+if a trap handler is
+enabled.</p>
+<p>The target-specific
+limitations and special
+SGPR layout are defined in
+the hardware
+documentation, which can
+be found in the
+<a class="reference internal" href="#amdgpu-processors"><em>Processors</em></a>
+table.</p>
+<p>Used by CP to set up
+<tt class="docutils literal"><span class="pre">COMPUTE_PGM_RSRC1.SGPRS</span></tt>.</p>
+<p class="last">The
+<a class="reference internal" href="#amdgpu-assembler"><em>Assembler</em></a>
+calculates this
+automatically for the
+selected processor from
+values provided to the
+<cite>.amdhsa_kernel</cite> directive
+by the
+<cite>.amdhsa_next_free_sgpr</cite>
+and <cite>.amdhsa_reserve_*</cite>
+nested directives (see
+<a class="reference internal" href="#amdhsa-kernel-directives-table"><em>AMDHSA Kernel Assembler Directives</em></a>).</p>
+</td>
+</tr>
+<tr class="row-even"><td>11:10</td>
+<td>2 bits</td>
+<td>PRIORITY</td>
+<td><p class="first">Must be 0.</p>
+<p>Start executing wavefront
+at the specified priority.</p>
+<p class="last">CP is responsible for
+filling in
+<tt class="docutils literal"><span class="pre">COMPUTE_PGM_RSRC1.PRIORITY</span></tt>.</p>
+</td>
+</tr>
+<tr class="row-odd"><td>13:12</td>
+<td>2 bits</td>
+<td>FLOAT_ROUND_MODE_32</td>
+<td><p class="first">Wavefront starts execution
+with specified rounding
+mode for single (32
+bit) floating point
+precision floating point
+operations.</p>
+<p>Floating point rounding
+mode values are defined in
+<a class="reference internal" href="#amdgpu-amdhsa-floating-point-rounding-mode-enumeration-values-table"><em>Floating Point Rounding Mode Enumeration Values</em></a>.</p>
+<p class="last">Used by CP to set up
+<tt class="docutils literal"><span class="pre">COMPUTE_PGM_RSRC1.FLOAT_MODE</span></tt>.</p>
+</td>
+</tr>
+<tr class="row-even"><td>15:14</td>
+<td>2 bits</td>
+<td>FLOAT_ROUND_MODE_16_64</td>
+<td><p class="first">Wavefront starts execution
+with specified rounding
+denorm mode for half/double (16
+and 64 bit) floating point
+precision floating point
+operations.</p>
+<p>Floating point rounding
+mode values are defined in
+<a class="reference internal" href="#amdgpu-amdhsa-floating-point-rounding-mode-enumeration-values-table"><em>Floating Point Rounding Mode Enumeration Values</em></a>.</p>
+<p class="last">Used by CP to set up
+<tt class="docutils literal"><span class="pre">COMPUTE_PGM_RSRC1.FLOAT_MODE</span></tt>.</p>
+</td>
+</tr>
+<tr class="row-odd"><td>17:16</td>
+<td>2 bits</td>
+<td>FLOAT_DENORM_MODE_32</td>
+<td><p class="first">Wavefront starts execution
+with specified denorm mode
+for single (32
+bit) floating point
+precision floating point
+operations.</p>
+<p>Floating point denorm mode
+values are defined in
+<a class="reference internal" href="#amdgpu-amdhsa-floating-point-denorm-mode-enumeration-values-table"><em>Floating Point Denorm Mode Enumeration Values</em></a>.</p>
+<p class="last">Used by CP to set up
+<tt class="docutils literal"><span class="pre">COMPUTE_PGM_RSRC1.FLOAT_MODE</span></tt>.</p>
+</td>
+</tr>
+<tr class="row-even"><td>19:18</td>
+<td>2 bits</td>
+<td>FLOAT_DENORM_MODE_16_64</td>
+<td><p class="first">Wavefront starts execution
+with specified denorm mode
+for half/double (16
+and 64 bit) floating point
+precision floating point
+operations.</p>
+<p>Floating point denorm mode
+values are defined in
+<a class="reference internal" href="#amdgpu-amdhsa-floating-point-denorm-mode-enumeration-values-table"><em>Floating Point Denorm Mode Enumeration Values</em></a>.</p>
+<p class="last">Used by CP to set up
+<tt class="docutils literal"><span class="pre">COMPUTE_PGM_RSRC1.FLOAT_MODE</span></tt>.</p>
+</td>
+</tr>
+<tr class="row-odd"><td>20</td>
+<td>1 bit</td>
+<td>PRIV</td>
+<td><p class="first">Must be 0.</p>
+<p>Start executing wavefront
+in privilege trap handler
+mode.</p>
+<p class="last">CP is responsible for
+filling in
+<tt class="docutils literal"><span class="pre">COMPUTE_PGM_RSRC1.PRIV</span></tt>.</p>
+</td>
+</tr>
+<tr class="row-even"><td>21</td>
+<td>1 bit</td>
+<td>ENABLE_DX10_CLAMP</td>
+<td><p class="first">Wavefront starts execution
+with DX10 clamp mode
+enabled. Used by the vector
+ALU to force DX10 style
+treatment of NaN’s (when
+set, clamp NaN to zero,
+otherwise pass NaN
+through).</p>
+<p class="last">Used by CP to set up
+<tt class="docutils literal"><span class="pre">COMPUTE_PGM_RSRC1.DX10_CLAMP</span></tt>.</p>
+</td>
+</tr>
+<tr class="row-odd"><td>22</td>
+<td>1 bit</td>
+<td>DEBUG_MODE</td>
+<td><p class="first">Must be 0.</p>
+<p>Start executing wavefront
+in single step mode.</p>
+<p class="last">CP is responsible for
+filling in
+<tt class="docutils literal"><span class="pre">COMPUTE_PGM_RSRC1.DEBUG_MODE</span></tt>.</p>
+</td>
+</tr>
+<tr class="row-even"><td>23</td>
+<td>1 bit</td>
+<td>ENABLE_IEEE_MODE</td>
+<td><p class="first">Wavefront starts execution
+with IEEE mode
+enabled. Floating point
+opcodes that support
+exception flag gathering
+will quiet and propagate
+signaling-NaN inputs per
+IEEE 754-2008. Min_dx10 and
+max_dx10 become IEEE
+754-2008 compliant due to
+signaling-NaN propagation
+and quieting.</p>
+<p class="last">Used by CP to set up
+<tt class="docutils literal"><span class="pre">COMPUTE_PGM_RSRC1.IEEE_MODE</span></tt>.</p>
+</td>
+</tr>
+<tr class="row-odd"><td>24</td>
+<td>1 bit</td>
+<td>BULKY</td>
+<td><p class="first">Must be 0.</p>
+<p>Only one work-group allowed
+to execute on a compute
+unit.</p>
+<p class="last">CP is responsible for
+filling in
+<tt class="docutils literal"><span class="pre">COMPUTE_PGM_RSRC1.BULKY</span></tt>.</p>
+</td>
+</tr>
+<tr class="row-even"><td>25</td>
+<td>1 bit</td>
+<td>CDBG_USER</td>
+<td><p class="first">Must be 0.</p>
+<p>Flag that can be used to
+control debugging code.</p>
+<p class="last">CP is responsible for
+filling in
+<tt class="docutils literal"><span class="pre">COMPUTE_PGM_RSRC1.CDBG_USER</span></tt>.</p>
+</td>
+</tr>
+<tr class="row-odd"><td>26</td>
+<td>1 bit</td>
+<td>FP16_OVFL</td>
+<td><dl class="first last docutils">
+<dt>GFX6-GFX8</dt>
+<dd>Reserved, must be 0.</dd>
+<dt>GFX9</dt>
+<dd><p class="first">Wavefront starts execution
+with specified fp16 overflow
+mode.</p>
+<ul class="simple">
+<li>If 0, fp16 overflow generates
++/-INF values.</li>
+<li>If 1, fp16 overflow that is the
+result of an +/-INF input value
+or divide by 0 produces a +/-INF,
+otherwise clamps computed
+overflow to +/-MAX_FP16 as
+appropriate.</li>
+</ul>
+<p class="last">Used by CP to set up
+<tt class="docutils literal"><span class="pre">COMPUTE_PGM_RSRC1.FP16_OVFL</span></tt>.</p>
+</dd>
+</dl>
+</td>
+</tr>
+<tr class="row-even"><td>31:27</td>
+<td>5 bits</td>
+<td> </td>
+<td>Reserved, must be 0.</td>
+</tr>
+<tr class="row-odd"><td>32</td>
+<td colspan="3"><strong>Total size 4 bytes</strong></td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-amdhsa-compute-pgm-rsrc2-gfx6-gfx9-table">
+<caption>compute_pgm_rsrc2 for GFX6-GFX9</caption>
+<colgroup>
+<col width="6%" />
+<col width="6%" />
+<col width="26%" />
+<col width="63%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Bits</th>
+<th class="head">Size</th>
+<th class="head">Field Name</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>0</td>
+<td>1 bit</td>
+<td>ENABLE_SGPR_PRIVATE_SEGMENT
+_WAVEFRONT_OFFSET</td>
+<td><p class="first">Enable the setup of the
+SGPR wavefront scratch offset
+system register (see
+<a class="reference internal" href="#amdgpu-amdhsa-initial-kernel-execution-state"><em>Initial Kernel Execution State</em></a>).</p>
+<p class="last">Used by CP to set up
+<tt class="docutils literal"><span class="pre">COMPUTE_PGM_RSRC2.SCRATCH_EN</span></tt>.</p>
+</td>
+</tr>
+<tr class="row-odd"><td>5:1</td>
+<td>5 bits</td>
+<td>USER_SGPR_COUNT</td>
+<td><p class="first">The total number of SGPR
+user data registers
+requested. This number must
+match the number of user
+data registers enabled.</p>
+<p class="last">Used by CP to set up
+<tt class="docutils literal"><span class="pre">COMPUTE_PGM_RSRC2.USER_SGPR</span></tt>.</p>
+</td>
+</tr>
+<tr class="row-even"><td>6</td>
+<td>1 bit</td>
+<td>ENABLE_TRAP_HANDLER</td>
+<td><p class="first">Must be 0.</p>
+<p class="last">This bit represents
+<tt class="docutils literal"><span class="pre">COMPUTE_PGM_RSRC2.TRAP_PRESENT</span></tt>,
+which is set by the CP if
+the runtime has installed a
+trap handler.</p>
+</td>
+</tr>
+<tr class="row-odd"><td>7</td>
+<td>1 bit</td>
+<td>ENABLE_SGPR_WORKGROUP_ID_X</td>
+<td><p class="first">Enable the setup of the
+system SGPR register for
+the work-group id in the X
+dimension (see
+<a class="reference internal" href="#amdgpu-amdhsa-initial-kernel-execution-state"><em>Initial Kernel Execution State</em></a>).</p>
+<p class="last">Used by CP to set up
+<tt class="docutils literal"><span class="pre">COMPUTE_PGM_RSRC2.TGID_X_EN</span></tt>.</p>
+</td>
+</tr>
+<tr class="row-even"><td>8</td>
+<td>1 bit</td>
+<td>ENABLE_SGPR_WORKGROUP_ID_Y</td>
+<td><p class="first">Enable the setup of the
+system SGPR register for
+the work-group id in the Y
+dimension (see
+<a class="reference internal" href="#amdgpu-amdhsa-initial-kernel-execution-state"><em>Initial Kernel Execution State</em></a>).</p>
+<p class="last">Used by CP to set up
+<tt class="docutils literal"><span class="pre">COMPUTE_PGM_RSRC2.TGID_Y_EN</span></tt>.</p>
+</td>
+</tr>
+<tr class="row-odd"><td>9</td>
+<td>1 bit</td>
+<td>ENABLE_SGPR_WORKGROUP_ID_Z</td>
+<td><p class="first">Enable the setup of the
+system SGPR register for
+the work-group id in the Z
+dimension (see
+<a class="reference internal" href="#amdgpu-amdhsa-initial-kernel-execution-state"><em>Initial Kernel Execution State</em></a>).</p>
+<p class="last">Used by CP to set up
+<tt class="docutils literal"><span class="pre">COMPUTE_PGM_RSRC2.TGID_Z_EN</span></tt>.</p>
+</td>
+</tr>
+<tr class="row-even"><td>10</td>
+<td>1 bit</td>
+<td>ENABLE_SGPR_WORKGROUP_INFO</td>
+<td><p class="first">Enable the setup of the
+system SGPR register for
+work-group information (see
+<a class="reference internal" href="#amdgpu-amdhsa-initial-kernel-execution-state"><em>Initial Kernel Execution State</em></a>).</p>
+<p class="last">Used by CP to set up
+<tt class="docutils literal"><span class="pre">COMPUTE_PGM_RSRC2.TGID_SIZE_EN</span></tt>.</p>
+</td>
+</tr>
+<tr class="row-odd"><td>12:11</td>
+<td>2 bits</td>
+<td>ENABLE_VGPR_WORKITEM_ID</td>
+<td><p class="first">Enable the setup of the
+VGPR system registers used
+for the work-item ID.
+<a class="reference internal" href="#amdgpu-amdhsa-system-vgpr-work-item-id-enumeration-values-table"><em>System VGPR Work-Item ID Enumeration Values</em></a>
+defines the values.</p>
+<p class="last">Used by CP to set up
+<tt class="docutils literal"><span class="pre">COMPUTE_PGM_RSRC2.TIDIG_CMP_CNT</span></tt>.</p>
+</td>
+</tr>
+<tr class="row-even"><td>13</td>
+<td>1 bit</td>
+<td>ENABLE_EXCEPTION_ADDRESS_WATCH</td>
+<td><p class="first">Must be 0.</p>
+<p>Wavefront starts execution
+with address watch
+exceptions enabled which
+are generated when L1 has
+witnessed a thread access
+an <em>address of
+interest</em>.</p>
+<p class="last">CP is responsible for
+filling in the address
+watch bit in
+<tt class="docutils literal"><span class="pre">COMPUTE_PGM_RSRC2.EXCP_EN_MSB</span></tt>
+according to what the
+runtime requests.</p>
+</td>
+</tr>
+<tr class="row-odd"><td>14</td>
+<td>1 bit</td>
+<td>ENABLE_EXCEPTION_MEMORY</td>
+<td><p class="first">Must be 0.</p>
+<p>Wavefront starts execution
+with memory violation
+exceptions exceptions
+enabled which are generated
+when a memory violation has
+occurred for this wavefront from
+L1 or LDS
+(write-to-read-only-memory,
+mis-aligned atomic, LDS
+address out of range,
+illegal address, etc.).</p>
+<p class="last">CP sets the memory
+violation bit in
+<tt class="docutils literal"><span class="pre">COMPUTE_PGM_RSRC2.EXCP_EN_MSB</span></tt>
+according to what the
+runtime requests.</p>
+</td>
+</tr>
+<tr class="row-even"><td>23:15</td>
+<td>9 bits</td>
+<td>GRANULATED_LDS_SIZE</td>
+<td><p class="first">Must be 0.</p>
+<p>CP uses the rounded value
+from the dispatch packet,
+not this value, as the
+dispatch may contain
+dynamically allocated group
+segment memory. CP writes
+directly to
+<tt class="docutils literal"><span class="pre">COMPUTE_PGM_RSRC2.LDS_SIZE</span></tt>.</p>
+<p>Amount of group segment
+(LDS) to allocate for each
+work-group. Granularity is
+device specific:</p>
+<dl class="last docutils">
+<dt>GFX6:</dt>
+<dd>roundup(lds-size / (64 * 4))</dd>
+<dt>GFX7-GFX9:</dt>
+<dd>roundup(lds-size / (128 * 4))</dd>
+</dl>
+</td>
+</tr>
+<tr class="row-odd"><td>24</td>
+<td>1 bit</td>
+<td>ENABLE_EXCEPTION_IEEE_754_FP
+_INVALID_OPERATION</td>
+<td><p class="first">Wavefront starts execution
+with specified exceptions
+enabled.</p>
+<p>Used by CP to set up
+<tt class="docutils literal"><span class="pre">COMPUTE_PGM_RSRC2.EXCP_EN</span></tt>
+(set from bits 0..6).</p>
+<p class="last">IEEE 754 FP Invalid
+Operation</p>
+</td>
+</tr>
+<tr class="row-even"><td>25</td>
+<td>1 bit</td>
+<td>ENABLE_EXCEPTION_FP_DENORMAL
+_SOURCE</td>
+<td>FP Denormal one or more
+input operands is a
+denormal number</td>
+</tr>
+<tr class="row-odd"><td>26</td>
+<td>1 bit</td>
+<td>ENABLE_EXCEPTION_IEEE_754_FP
+_DIVISION_BY_ZERO</td>
+<td>IEEE 754 FP Division by
+Zero</td>
+</tr>
+<tr class="row-even"><td>27</td>
+<td>1 bit</td>
+<td>ENABLE_EXCEPTION_IEEE_754_FP
+_OVERFLOW</td>
+<td>IEEE 754 FP FP Overflow</td>
+</tr>
+<tr class="row-odd"><td>28</td>
+<td>1 bit</td>
+<td>ENABLE_EXCEPTION_IEEE_754_FP
+_UNDERFLOW</td>
+<td>IEEE 754 FP Underflow</td>
+</tr>
+<tr class="row-even"><td>29</td>
+<td>1 bit</td>
+<td>ENABLE_EXCEPTION_IEEE_754_FP
+_INEXACT</td>
+<td>IEEE 754 FP Inexact</td>
+</tr>
+<tr class="row-odd"><td>30</td>
+<td>1 bit</td>
+<td>ENABLE_EXCEPTION_INT_DIVIDE_BY
+_ZERO</td>
+<td>Integer Division by Zero
+(rcp_iflag_f32 instruction
+only)</td>
+</tr>
+<tr class="row-even"><td>31</td>
+<td>1 bit</td>
+<td> </td>
+<td>Reserved, must be 0.</td>
+</tr>
+<tr class="row-odd"><td>32</td>
+<td colspan="3"><strong>Total size 4 bytes.</strong></td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-amdhsa-floating-point-rounding-mode-enumeration-values-table">
+<caption>Floating Point Rounding Mode Enumeration Values</caption>
+<colgroup>
+<col width="52%" />
+<col width="7%" />
+<col width="41%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Enumeration Name</th>
+<th class="head">Value</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>FLOAT_ROUND_MODE_NEAR_EVEN</td>
+<td>0</td>
+<td>Round Ties To Even</td>
+</tr>
+<tr class="row-odd"><td>FLOAT_ROUND_MODE_PLUS_INFINITY</td>
+<td>1</td>
+<td>Round Toward +infinity</td>
+</tr>
+<tr class="row-even"><td>FLOAT_ROUND_MODE_MINUS_INFINITY</td>
+<td>2</td>
+<td>Round Toward -infinity</td>
+</tr>
+<tr class="row-odd"><td>FLOAT_ROUND_MODE_ZERO</td>
+<td>3</td>
+<td>Round Toward 0</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-amdhsa-floating-point-denorm-mode-enumeration-values-table">
+<caption>Floating Point Denorm Mode Enumeration Values</caption>
+<colgroup>
+<col width="52%" />
+<col width="7%" />
+<col width="41%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Enumeration Name</th>
+<th class="head">Value</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>FLOAT_DENORM_MODE_FLUSH_SRC_DST</td>
+<td>0</td>
+<td>Flush Source and Destination
+Denorms</td>
+</tr>
+<tr class="row-odd"><td>FLOAT_DENORM_MODE_FLUSH_DST</td>
+<td>1</td>
+<td>Flush Output Denorms</td>
+</tr>
+<tr class="row-even"><td>FLOAT_DENORM_MODE_FLUSH_SRC</td>
+<td>2</td>
+<td>Flush Source Denorms</td>
+</tr>
+<tr class="row-odd"><td>FLOAT_DENORM_MODE_FLUSH_NONE</td>
+<td>3</td>
+<td>No Flush</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-amdhsa-system-vgpr-work-item-id-enumeration-values-table">
+<caption>System VGPR Work-Item ID Enumeration Values</caption>
+<colgroup>
+<col width="55%" />
+<col width="7%" />
+<col width="38%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Enumeration Name</th>
+<th class="head">Value</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>SYSTEM_VGPR_WORKITEM_ID_X</td>
+<td>0</td>
+<td>Set work-item X dimension
+ID.</td>
+</tr>
+<tr class="row-odd"><td>SYSTEM_VGPR_WORKITEM_ID_X_Y</td>
+<td>1</td>
+<td>Set work-item X and Y
+dimensions ID.</td>
+</tr>
+<tr class="row-even"><td>SYSTEM_VGPR_WORKITEM_ID_X_Y_Z</td>
+<td>2</td>
+<td>Set work-item X, Y and Z
+dimensions ID.</td>
+</tr>
+<tr class="row-odd"><td>SYSTEM_VGPR_WORKITEM_ID_UNDEFINED</td>
+<td>3</td>
+<td>Undefined.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="initial-kernel-execution-state">
+<span id="amdgpu-amdhsa-initial-kernel-execution-state"></span><h4><a class="toc-backref" href="#id72">Initial Kernel Execution State</a><a class="headerlink" href="#initial-kernel-execution-state" title="Permalink to this headline">¶</a></h4>
+<p>This section defines the register state that will be set up by the packet
+processor prior to the start of execution of every wavefront. This is limited by
+the constraints of the hardware controllers of CP/ADC/SPI.</p>
+<p>The order of the SGPR registers is defined, but the compiler can specify which
+ones are actually setup in the kernel descriptor using the <tt class="docutils literal"><span class="pre">enable_sgpr_*</span></tt> bit
+fields (see <a class="reference internal" href="#amdgpu-amdhsa-kernel-descriptor"><em>Kernel Descriptor</em></a>). The register numbers used
+for enabled registers are dense starting at SGPR0: the first enabled register is
+SGPR0, the next enabled register is SGPR1 etc.; disabled registers do not have
+an SGPR number.</p>
+<p>The initial SGPRs comprise up to 16 User SRGPs that are set by CP and apply to
+all wavefronts of the grid. It is possible to specify more than 16 User SGPRs using
+the <tt class="docutils literal"><span class="pre">enable_sgpr_*</span></tt> bit fields, in which case only the first 16 are actually
+initialized. These are then immediately followed by the System SGPRs that are
+set up by ADC/SPI and can have different values for each wavefront of the grid
+dispatch.</p>
+<p>SGPR register initial state is defined in
+<a class="reference internal" href="#amdgpu-amdhsa-sgpr-register-set-up-order-table"><em>SGPR Register Set Up Order</em></a>.</p>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-amdhsa-sgpr-register-set-up-order-table">
+<caption>SGPR Register Set Up Order</caption>
+<colgroup>
+<col width="13%" />
+<col width="33%" />
+<col width="8%" />
+<col width="46%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">SGPR Order</th>
+<th class="head">Name
+(kernel descriptor enable
+field)</th>
+<th class="head">Number
+of
+SGPRs</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>First</td>
+<td>Private Segment Buffer
+(enable_sgpr_private
+_segment_buffer)</td>
+<td>4</td>
+<td><p class="first">V# that can be used, together
+with Scratch Wavefront Offset
+as an offset, to access the
+private memory space using a
+segment address.</p>
+<p class="last">CP uses the value provided by
+the runtime.</p>
+</td>
+</tr>
+<tr class="row-odd"><td>then</td>
+<td>Dispatch Ptr
+(enable_sgpr_dispatch_ptr)</td>
+<td>2</td>
+<td>64 bit address of AQL dispatch
+packet for kernel dispatch
+actually executing.</td>
+</tr>
+<tr class="row-even"><td>then</td>
+<td>Queue Ptr
+(enable_sgpr_queue_ptr)</td>
+<td>2</td>
+<td>64 bit address of amd_queue_t
+object for AQL queue on which
+the dispatch packet was
+queued.</td>
+</tr>
+<tr class="row-odd"><td>then</td>
+<td>Kernarg Segment Ptr
+(enable_sgpr_kernarg
+_segment_ptr)</td>
+<td>2</td>
+<td><p class="first">64 bit address of Kernarg
+segment. This is directly
+copied from the
+kernarg_address in the kernel
+dispatch packet.</p>
+<p class="last">Having CP load it once avoids
+loading it at the beginning of
+every wavefront.</p>
+</td>
+</tr>
+<tr class="row-even"><td>then</td>
+<td>Dispatch Id
+(enable_sgpr_dispatch_id)</td>
+<td>2</td>
+<td>64 bit Dispatch ID of the
+dispatch packet being
+executed.</td>
+</tr>
+<tr class="row-odd"><td>then</td>
+<td>Flat Scratch Init
+(enable_sgpr_flat_scratch
+_init)</td>
+<td>2</td>
+<td><p class="first">This is 2 SGPRs:</p>
+<dl class="last docutils">
+<dt>GFX6</dt>
+<dd>Not supported.</dd>
+<dt>GFX7-GFX8</dt>
+<dd><p class="first">The first SGPR is a 32 bit
+byte offset from
+<tt class="docutils literal"><span class="pre">SH_HIDDEN_PRIVATE_BASE_VIMID</span></tt>
+to per SPI base of memory
+for scratch for the queue
+executing the kernel
+dispatch. CP obtains this
+from the runtime. (The
+Scratch Segment Buffer base
+address is
+<tt class="docutils literal"><span class="pre">SH_HIDDEN_PRIVATE_BASE_VIMID</span></tt>
+plus this offset.) The value
+of Scratch Wavefront Offset must
+be added to this offset by
+the kernel machine code,
+right shifted by 8, and
+moved to the FLAT_SCRATCH_HI
+SGPR register.
+FLAT_SCRATCH_HI corresponds
+to SGPRn-4 on GFX7, and
+SGPRn-6 on GFX8 (where SGPRn
+is the highest numbered SGPR
+allocated to the wavefront).
+FLAT_SCRATCH_HI is
+multiplied by 256 (as it is
+in units of 256 bytes) and
+added to
+<tt class="docutils literal"><span class="pre">SH_HIDDEN_PRIVATE_BASE_VIMID</span></tt>
+to calculate the per wavefront
+FLAT SCRATCH BASE in flat
+memory instructions that
+access the scratch
+apperture.</p>
+<p class="last">The second SGPR is 32 bit
+byte size of a single
+work-item’s scratch memory
+usage. CP obtains this from
+the runtime, and it is
+always a multiple of DWORD.
+CP checks that the value in
+the kernel dispatch packet
+Private Segment Byte Size is
+not larger, and requests the
+runtime to increase the
+queue’s scratch size if
+necessary. The kernel code
+must move it to
+FLAT_SCRATCH_LO which is
+SGPRn-3 on GFX7 and SGPRn-5
+on GFX8. FLAT_SCRATCH_LO is
+used as the FLAT SCRATCH
+SIZE in flat memory
+instructions. Having CP load
+it once avoids loading it at
+the beginning of every
+wavefront.</p>
+</dd>
+<dt>GFX9</dt>
+<dd>This is the
+64 bit base address of the
+per SPI scratch backing
+memory managed by SPI for
+the queue executing the
+kernel dispatch. CP obtains
+this from the runtime (and
+divides it if there are
+multiple Shader Arrays each
+with its own SPI). The value
+of Scratch Wavefront Offset must
+be added by the kernel
+machine code and the result
+moved to the FLAT_SCRATCH
+SGPR which is SGPRn-6 and
+SGPRn-5. It is used as the
+FLAT SCRATCH BASE in flat
+memory instructions.</dd>
+</dl>
+</td>
+</tr>
+<tr class="row-even"><td>then</td>
+<td>Private Segment Size</td>
+<td>1</td>
+<td><p class="first">The 32 bit byte size of a
+(enable_sgpr_private single
+work-item’s
+scratch_segment_size) memory
+allocation. This is the
+value from the kernel
+dispatch packet Private
+Segment Byte Size rounded up
+by CP to a multiple of
+DWORD.</p>
+<p>Having CP load it once avoids
+loading it at the beginning of
+every wavefront.</p>
+<p class="last">This is not used for
+GFX7-GFX8 since it is the same
+value as the second SGPR of
+Flat Scratch Init. However, it
+may be needed for GFX9 which
+changes the meaning of the
+Flat Scratch Init value.</p>
+</td>
+</tr>
+<tr class="row-odd"><td>then</td>
+<td>Grid Work-Group Count X
+(enable_sgpr_grid
+_workgroup_count_X)</td>
+<td>1</td>
+<td>32 bit count of the number of
+work-groups in the X dimension
+for the grid being
+executed. Computed from the
+fields in the kernel dispatch
+packet as ((grid_size.x +
+workgroup_size.x - 1) /
+workgroup_size.x).</td>
+</tr>
+<tr class="row-even"><td>then</td>
+<td>Grid Work-Group Count Y
+(enable_sgpr_grid
+_workgroup_count_Y &&
+less than 16 previous
+SGPRs)</td>
+<td>1</td>
+<td><p class="first">32 bit count of the number of
+work-groups in the Y dimension
+for the grid being
+executed. Computed from the
+fields in the kernel dispatch
+packet as ((grid_size.y +
+workgroup_size.y - 1) /
+workgroupSize.y).</p>
+<p class="last">Only initialized if <16
+previous SGPRs initialized.</p>
+</td>
+</tr>
+<tr class="row-odd"><td>then</td>
+<td>Grid Work-Group Count Z
+(enable_sgpr_grid
+_workgroup_count_Z &&
+less than 16 previous
+SGPRs)</td>
+<td>1</td>
+<td><p class="first">32 bit count of the number of
+work-groups in the Z dimension
+for the grid being
+executed. Computed from the
+fields in the kernel dispatch
+packet as ((grid_size.z +
+workgroup_size.z - 1) /
+workgroupSize.z).</p>
+<p class="last">Only initialized if <16
+previous SGPRs initialized.</p>
+</td>
+</tr>
+<tr class="row-even"><td>then</td>
+<td>Work-Group Id X
+(enable_sgpr_workgroup_id
+_X)</td>
+<td>1</td>
+<td>32 bit work-group id in X
+dimension of grid for
+wavefront.</td>
+</tr>
+<tr class="row-odd"><td>then</td>
+<td>Work-Group Id Y
+(enable_sgpr_workgroup_id
+_Y)</td>
+<td>1</td>
+<td>32 bit work-group id in Y
+dimension of grid for
+wavefront.</td>
+</tr>
+<tr class="row-even"><td>then</td>
+<td>Work-Group Id Z
+(enable_sgpr_workgroup_id
+_Z)</td>
+<td>1</td>
+<td>32 bit work-group id in Z
+dimension of grid for
+wavefront.</td>
+</tr>
+<tr class="row-odd"><td>then</td>
+<td>Work-Group Info
+(enable_sgpr_workgroup
+_info)</td>
+<td>1</td>
+<td>{first_wavefront, 14’b0000,
+ordered_append_term[10:0],
+threadgroup_size_in_wavefronts[5:0]}</td>
+</tr>
+<tr class="row-even"><td>then</td>
+<td>Scratch Wavefront Offset
+(enable_sgpr_private
+_segment_wavefront_offset)</td>
+<td>1</td>
+<td>32 bit byte offset from base
+of scratch base of queue
+executing the kernel
+dispatch. Must be used as an
+offset with Private
+segment address when using
+Scratch Segment Buffer. It
+must be used to set up FLAT
+SCRATCH for flat addressing
+(see
+<a class="reference internal" href="#amdgpu-amdhsa-flat-scratch"><em>Flat Scratch</em></a>).</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+<p>The order of the VGPR registers is defined, but the compiler can specify which
+ones are actually setup in the kernel descriptor using the <tt class="docutils literal"><span class="pre">enable_vgpr*</span></tt> bit
+fields (see <a class="reference internal" href="#amdgpu-amdhsa-kernel-descriptor"><em>Kernel Descriptor</em></a>). The register numbers used
+for enabled registers are dense starting at VGPR0: the first enabled register is
+VGPR0, the next enabled register is VGPR1 etc.; disabled registers do not have a
+VGPR number.</p>
+<p>VGPR register initial state is defined in
+<a class="reference internal" href="#amdgpu-amdhsa-vgpr-register-set-up-order-table"><em>VGPR Register Set Up Order</em></a>.</p>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-amdhsa-vgpr-register-set-up-order-table">
+<caption>VGPR Register Set Up Order</caption>
+<colgroup>
+<col width="14%" />
+<col width="36%" />
+<col width="8%" />
+<col width="42%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">VGPR Order</th>
+<th class="head">Name
+(kernel descriptor enable
+field)</th>
+<th class="head">Number
+of
+VGPRs</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>First</td>
+<td>Work-Item Id X
+(Always initialized)</td>
+<td>1</td>
+<td>32 bit work item id in X
+dimension of work-group for
+wavefront lane.</td>
+</tr>
+<tr class="row-odd"><td>then</td>
+<td>Work-Item Id Y
+(enable_vgpr_workitem_id
+> 0)</td>
+<td>1</td>
+<td>32 bit work item id in Y
+dimension of work-group for
+wavefront lane.</td>
+</tr>
+<tr class="row-even"><td>then</td>
+<td>Work-Item Id Z
+(enable_vgpr_workitem_id
+> 1)</td>
+<td>1</td>
+<td>32 bit work item id in Z
+dimension of work-group for
+wavefront lane.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+<p>The setting of registers is done by GPU CP/ADC/SPI hardware as follows:</p>
+<ol class="arabic simple">
+<li>SGPRs before the Work-Group Ids are set by CP using the 16 User Data
+registers.</li>
+<li>Work-group Id registers X, Y, Z are set by ADC which supports any
+combination including none.</li>
+<li>Scratch Wavefront Offset is set by SPI in a per wavefront basis which is why
+its value cannot included with the flat scratch init value which is per queue.</li>
+<li>The VGPRs are set by SPI which only supports specifying either (X), (X, Y)
+or (X, Y, Z).</li>
+</ol>
+<p>Flat Scratch register pair are adjacent SGRRs so they can be moved as a 64 bit
+value to the hardware required SGPRn-3 and SGPRn-4 respectively.</p>
+<p>The global segment can be accessed either using buffer instructions (GFX6 which
+has V# 64 bit address support), flat instructions (GFX7-GFX9), or global
+instructions (GFX9).</p>
+<p>If buffer operations are used then the compiler can generate a V# with the
+following properties:</p>
+<ul class="simple">
+<li>base address of 0</li>
+<li>no swizzle</li>
+<li>ATC: 1 if IOMMU present (such as APU)</li>
+<li>ptr64: 1</li>
+<li>MTYPE set to support memory coherence that matches the runtime (such as CC for
+APU and NC for dGPU).</li>
+</ul>
+</div>
+<div class="section" id="kernel-prolog">
+<span id="amdgpu-amdhsa-kernel-prolog"></span><h4><a class="toc-backref" href="#id73">Kernel Prolog</a><a class="headerlink" href="#kernel-prolog" title="Permalink to this headline">¶</a></h4>
+<div class="section" id="m0">
+<span id="amdgpu-amdhsa-m0"></span><h5><a class="toc-backref" href="#id74">M0</a><a class="headerlink" href="#m0" title="Permalink to this headline">¶</a></h5>
+<dl class="docutils">
+<dt>GFX6-GFX8</dt>
+<dd>The M0 register must be initialized with a value at least the total LDS size
+if the kernel may access LDS via DS or flat operations. Total LDS size is
+available in dispatch packet. For M0, it is also possible to use maximum
+possible value of LDS for given target (0x7FFF for GFX6 and 0xFFFF for
+GFX7-GFX8).</dd>
+<dt>GFX9</dt>
+<dd>The M0 register is not used for range checking LDS accesses and so does not
+need to be initialized in the prolog.</dd>
+</dl>
+</div>
+<div class="section" id="flat-scratch">
+<span id="amdgpu-amdhsa-flat-scratch"></span><h5><a class="toc-backref" href="#id75">Flat Scratch</a><a class="headerlink" href="#flat-scratch" title="Permalink to this headline">¶</a></h5>
+<p>If the kernel may use flat operations to access scratch memory, the prolog code
+must set up FLAT_SCRATCH register pair (FLAT_SCRATCH_LO/FLAT_SCRATCH_HI which
+are in SGPRn-4/SGPRn-3). Initialization uses Flat Scratch Init and Scratch Wavefront
+Offset SGPR registers (see <a class="reference internal" href="#amdgpu-amdhsa-initial-kernel-execution-state"><em>Initial Kernel Execution State</em></a>):</p>
+<dl class="docutils">
+<dt>GFX6</dt>
+<dd>Flat scratch is not supported.</dd>
+<dt>GFX7-GFX8</dt>
+<dd><ol class="first last arabic simple">
+<li>The low word of Flat Scratch Init is 32 bit byte offset from
+<tt class="docutils literal"><span class="pre">SH_HIDDEN_PRIVATE_BASE_VIMID</span></tt> to the base of scratch backing memory
+being managed by SPI for the queue executing the kernel dispatch. This is
+the same value used in the Scratch Segment Buffer V# base address. The
+prolog must add the value of Scratch Wavefront Offset to get the wavefront’s byte
+scratch backing memory offset from <tt class="docutils literal"><span class="pre">SH_HIDDEN_PRIVATE_BASE_VIMID</span></tt>. Since
+FLAT_SCRATCH_LO is in units of 256 bytes, the offset must be right shifted
+by 8 before moving into FLAT_SCRATCH_LO.</li>
+<li>The second word of Flat Scratch Init is 32 bit byte size of a single
+work-items scratch memory usage. This is directly loaded from the kernel
+dispatch packet Private Segment Byte Size and rounded up to a multiple of
+DWORD. Having CP load it once avoids loading it at the beginning of every
+wavefront. The prolog must move it to FLAT_SCRATCH_LO for use as FLAT SCRATCH
+SIZE.</li>
+</ol>
+</dd>
+<dt>GFX9</dt>
+<dd>The Flat Scratch Init is the 64 bit address of the base of scratch backing
+memory being managed by SPI for the queue executing the kernel dispatch. The
+prolog must add the value of Scratch Wavefront Offset and moved to the FLAT_SCRATCH
+pair for use as the flat scratch base in flat memory instructions.</dd>
+</dl>
+</div>
+</div>
+<div class="section" id="memory-model">
+<span id="amdgpu-amdhsa-memory-model"></span><h4><a class="toc-backref" href="#id76">Memory Model</a><a class="headerlink" href="#memory-model" title="Permalink to this headline">¶</a></h4>
+<p>This section describes the mapping of LLVM memory model onto AMDGPU machine code
+(see <a class="reference internal" href="LangRef.html#memmodel"><em>Memory Model for Concurrent Operations</em></a>). <em>The implementation is WIP.</em></p>
+<p>The AMDGPU backend supports the memory synchronization scopes specified in
+<a class="reference internal" href="#amdgpu-memory-scopes"><em>Memory Scopes</em></a>.</p>
+<p>The code sequences used to implement the memory model are defined in table
+<a class="reference internal" href="#amdgpu-amdhsa-memory-model-code-sequences-gfx6-gfx9-table"><em>AMDHSA Memory Model Code Sequences GFX6-GFX9</em></a>.</p>
+<p>The sequences specify the order of instructions that a single thread must
+execute. The <tt class="docutils literal"><span class="pre">s_waitcnt</span></tt> and <tt class="docutils literal"><span class="pre">buffer_wbinvl1_vol</span></tt> are defined with respect
+to other memory instructions executed by the same thread. This allows them to be
+moved earlier or later which can allow them to be combined with other instances
+of the same instruction, or hoisted/sunk out of loops to improve
+performance. Only the instructions related to the memory model are given;
+additional <tt class="docutils literal"><span class="pre">s_waitcnt</span></tt> instructions are required to ensure registers are
+defined before being used. These may be able to be combined with the memory
+model <tt class="docutils literal"><span class="pre">s_waitcnt</span></tt> instructions as described above.</p>
+<p>The AMDGPU backend supports the following memory models:</p>
+<blockquote>
+<div><dl class="docutils">
+<dt>HSA Memory Model <a class="reference internal" href="#hsa">[HSA]</a></dt>
+<dd>The HSA memory model uses a single happens-before relation for all address
+spaces (see <a class="reference internal" href="#amdgpu-address-spaces"><em>Address Spaces</em></a>).</dd>
+<dt>OpenCL Memory Model <a class="reference internal" href="#id41">[OpenCL]</a></dt>
+<dd>The OpenCL memory model which has separate happens-before relations for the
+global and local address spaces. Only a fence specifying both global and
+local address space, and seq_cst instructions join the relationships. Since
+the LLVM <tt class="docutils literal"><span class="pre">memfence</span></tt> instruction does not allow an address space to be
+specified the OpenCL fence has to convervatively assume both local and
+global address space was specified. However, optimizations can often be
+done to eliminate the additional <tt class="docutils literal"><span class="pre">s_waitcnt</span></tt> instructions when there are
+no intervening memory instructions which access the corresponding address
+space. The code sequences in the table indicate what can be omitted for the
+OpenCL memory. The target triple environment is used to determine if the
+source language is OpenCL (see <a class="reference internal" href="#amdgpu-opencl"><em>OpenCL</em></a>).</dd>
+</dl>
+</div></blockquote>
+<p><tt class="docutils literal"><span class="pre">ds/flat_load/store/atomic</span></tt> instructions to local memory are termed LDS
+operations.</p>
+<p><tt class="docutils literal"><span class="pre">buffer/global/flat_load/store/atomic</span></tt> instructions to global memory are
+termed vector memory operations.</p>
+<p>For GFX6-GFX9:</p>
+<ul class="simple">
+<li>Each agent has multiple compute units (CU).</li>
+<li>Each CU has multiple SIMDs that execute wavefronts.</li>
+<li>The wavefronts for a single work-group are executed in the same CU but may be
+executed by different SIMDs.</li>
+<li>Each CU has a single LDS memory shared by the wavefronts of the work-groups
+executing on it.</li>
+<li>All LDS operations of a CU are performed as wavefront wide operations in a
+global order and involve no caching. Completion is reported to a wavefront in
+execution order.</li>
+<li>The LDS memory has multiple request queues shared by the SIMDs of a
+CU. Therefore, the LDS operations performed by different wavefronts of a work-group
+can be reordered relative to each other, which can result in reordering the
+visibility of vector memory operations with respect to LDS operations of other
+wavefronts in the same work-group. A <tt class="docutils literal"><span class="pre">s_waitcnt</span> <span class="pre">lgkmcnt(0)</span></tt> is required to
+ensure synchronization between LDS operations and vector memory operations
+between wavefronts of a work-group, but not between operations performed by the
+same wavefront.</li>
+<li>The vector memory operations are performed as wavefront wide operations and
+completion is reported to a wavefront in execution order. The exception is
+that for GFX7-GFX9 <tt class="docutils literal"><span class="pre">flat_load/store/atomic</span></tt> instructions can report out of
+vector memory order if they access LDS memory, and out of LDS operation order
+if they access global memory.</li>
+<li>The vector memory operations access a single vector L1 cache shared by all
+SIMDs a CU. Therefore, no special action is required for coherence between the
+lanes of a single wavefront, or for coherence between wavefronts in the same
+work-group. A <tt class="docutils literal"><span class="pre">buffer_wbinvl1_vol</span></tt> is required for coherence between wavefronts
+executing in different work-groups as they may be executing on different CUs.</li>
+<li>The scalar memory operations access a scalar L1 cache shared by all wavefronts
+on a group of CUs. The scalar and vector L1 caches are not coherent. However,
+scalar operations are used in a restricted way so do not impact the memory
+model. See <a class="reference internal" href="#amdgpu-amdhsa-memory-spaces"><em>Memory Spaces</em></a>.</li>
+<li>The vector and scalar memory operations use an L2 cache shared by all CUs on
+the same agent.</li>
+<li>The L2 cache has independent channels to service disjoint ranges of virtual
+addresses.</li>
+<li>Each CU has a separate request queue per channel. Therefore, the vector and
+scalar memory operations performed by wavefronts executing in different work-groups
+(which may be executing on different CUs) of an agent can be reordered
+relative to each other. A <tt class="docutils literal"><span class="pre">s_waitcnt</span> <span class="pre">vmcnt(0)</span></tt> is required to ensure
+synchronization between vector memory operations of different CUs. It ensures a
+previous vector memory operation has completed before executing a subsequent
+vector memory or LDS operation and so can be used to meet the requirements of
+acquire and release.</li>
+<li>The L2 cache can be kept coherent with other agents on some targets, or ranges
+of virtual addresses can be set up to bypass it to ensure system coherence.</li>
+</ul>
+<p>Private address space uses <tt class="docutils literal"><span class="pre">buffer_load/store</span></tt> using the scratch V# (GFX6-GFX8),
+or <tt class="docutils literal"><span class="pre">scratch_load/store</span></tt> (GFX9). Since only a single thread is accessing the
+memory, atomic memory orderings are not meaningful and all accesses are treated
+as non-atomic.</p>
+<p>Constant address space uses <tt class="docutils literal"><span class="pre">buffer/global_load</span></tt> instructions (or equivalent
+scalar memory instructions). Since the constant address space contents do not
+change during the execution of a kernel dispatch it is not legal to perform
+stores, and atomic memory orderings are not meaningful and all access are
+treated as non-atomic.</p>
+<p>A memory synchronization scope wider than work-group is not meaningful for the
+group (LDS) address space and is treated as work-group.</p>
+<p>The memory model does not support the region address space which is treated as
+non-atomic.</p>
+<p>Acquire memory ordering is not meaningful on store atomic instructions and is
+treated as non-atomic.</p>
+<p>Release memory ordering is not meaningful on load atomic instructions and is
+treated a non-atomic.</p>
+<p>Acquire-release memory ordering is not meaningful on load or store atomic
+instructions and is treated as acquire and release respectively.</p>
+<p>AMDGPU backend only uses scalar memory operations to access memory that is
+proven to not change during the execution of the kernel dispatch. This includes
+constant address space and global address space for program scope const
+variables. Therefore the kernel machine code does not have to maintain the
+scalar L1 cache to ensure it is coherent with the vector L1 cache. The scalar
+and vector L1 caches are invalidated between kernel dispatches by CP since
+constant address space data may change between kernel dispatch executions. See
+<a class="reference internal" href="#amdgpu-amdhsa-memory-spaces"><em>Memory Spaces</em></a>.</p>
+<p>The one execption is if scalar writes are used to spill SGPR registers. In this
+case the AMDGPU backend ensures the memory location used to spill is never
+accessed by vector memory operations at the same time. If scalar writes are used
+then a <tt class="docutils literal"><span class="pre">s_dcache_wb</span></tt> is inserted before the <tt class="docutils literal"><span class="pre">s_endpgm</span></tt> and before a function
+return since the locations may be used for vector memory instructions by a
+future wavefront that uses the same scratch area, or a function call that creates a
+frame at the same address, respectively. There is no need for a <tt class="docutils literal"><span class="pre">s_dcache_inv</span></tt>
+as all scalar writes are write-before-read in the same thread.</p>
+<p>Scratch backing memory (which is used for the private address space)
+is accessed with MTYPE NC_NV (non-coherenent non-volatile). Since the private
+address space is only accessed by a single thread, and is always
+write-before-read, there is never a need to invalidate these entries from the L1
+cache. Hence all cache invalidates are done as <tt class="docutils literal"><span class="pre">*_vol</span></tt> to only invalidate the
+volatile cache lines.</p>
+<p>On dGPU the kernarg backing memory is accessed as UC (uncached) to avoid needing
+to invalidate the L2 cache. This also causes it to be treated as
+non-volatile and so is not invalidated by <tt class="docutils literal"><span class="pre">*_vol</span></tt>. On APU it is accessed as CC
+(cache coherent) and so the L2 cache will coherent with the CPU and other
+agents.</p>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-amdhsa-memory-model-code-sequences-gfx6-gfx9-table">
+<caption>AMDHSA Memory Model Code Sequences GFX6-GFX9</caption>
+<colgroup>
+<col width="15%" />
+<col width="15%" />
+<col width="18%" />
+<col width="13%" />
+<col width="39%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">LLVM Instr</th>
+<th class="head">LLVM Memory
+Ordering</th>
+<th class="head">LLVM Memory
+Sync Scope</th>
+<th class="head">AMDGPU
+Address
+Space</th>
+<th class="head">AMDGPU Machine Code</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td colspan="5"><strong>Non-Atomic</strong></td>
+</tr>
+<tr class="row-odd"><td>load</td>
+<td><em>none</em></td>
+<td><em>none</em></td>
+<td><ul class="first last simple">
+<li>global</li>
+<li>generic</li>
+<li>private</li>
+<li>constant</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>!volatile & !nontemporal<ol class="arabic">
+<li>buffer/global/flat_load</li>
+</ol>
+</li>
+<li>volatile & !nontemporal<ol class="arabic">
+<li>buffer/global/flat_load
+glc=1</li>
+</ol>
+</li>
+<li>nontemporal<ol class="arabic">
+<li>buffer/global/flat_load
+glc=1 slc=1</li>
+</ol>
+</li>
+</ul>
+</td>
+</tr>
+<tr class="row-even"><td>load</td>
+<td><em>none</em></td>
+<td><em>none</em></td>
+<td><ul class="first last simple">
+<li>local</li>
+</ul>
+</td>
+<td><ol class="first last arabic simple">
+<li>ds_load</li>
+</ol>
+</td>
+</tr>
+<tr class="row-odd"><td>store</td>
+<td><em>none</em></td>
+<td><em>none</em></td>
+<td><ul class="first last simple">
+<li>global</li>
+<li>generic</li>
+<li>private</li>
+<li>constant</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>!nontemporal<ol class="arabic">
+<li>buffer/global/flat_store</li>
+</ol>
+</li>
+<li>nontemporal<ol class="arabic">
+<li>buffer/global/flat_stote
+glc=1 slc=1</li>
+</ol>
+</li>
+</ul>
+</td>
+</tr>
+<tr class="row-even"><td>store</td>
+<td><em>none</em></td>
+<td><em>none</em></td>
+<td><ul class="first last simple">
+<li>local</li>
+</ul>
+</td>
+<td><ol class="first last arabic simple">
+<li>ds_store</li>
+</ol>
+</td>
+</tr>
+<tr class="row-odd"><td colspan="5"><strong>Unordered Atomic</strong></td>
+</tr>
+<tr class="row-even"><td>load atomic</td>
+<td>unordered</td>
+<td><em>any</em></td>
+<td><em>any</em></td>
+<td><em>Same as non-atomic</em>.</td>
+</tr>
+<tr class="row-odd"><td>store atomic</td>
+<td>unordered</td>
+<td><em>any</em></td>
+<td><em>any</em></td>
+<td><em>Same as non-atomic</em>.</td>
+</tr>
+<tr class="row-even"><td>atomicrmw</td>
+<td>unordered</td>
+<td><em>any</em></td>
+<td><em>any</em></td>
+<td><em>Same as monotonic
+atomic</em>.</td>
+</tr>
+<tr class="row-odd"><td colspan="5"><strong>Monotonic Atomic</strong></td>
+</tr>
+<tr class="row-even"><td>load atomic</td>
+<td>monotonic</td>
+<td><ul class="first last simple">
+<li>singlethread</li>
+<li>wavefront</li>
+<li>workgroup</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>global</li>
+<li>generic</li>
+</ul>
+</td>
+<td><ol class="first last arabic simple">
+<li>buffer/global/flat_load</li>
+</ol>
+</td>
+</tr>
+<tr class="row-odd"><td>load atomic</td>
+<td>monotonic</td>
+<td><ul class="first last simple">
+<li>singlethread</li>
+<li>wavefront</li>
+<li>workgroup</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>local</li>
+</ul>
+</td>
+<td><ol class="first last arabic simple">
+<li>ds_load</li>
+</ol>
+</td>
+</tr>
+<tr class="row-even"><td>load atomic</td>
+<td>monotonic</td>
+<td><ul class="first last simple">
+<li>agent</li>
+<li>system</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>global</li>
+<li>generic</li>
+</ul>
+</td>
+<td><ol class="first last arabic simple">
+<li>buffer/global/flat_load
+glc=1</li>
+</ol>
+</td>
+</tr>
+<tr class="row-odd"><td>store atomic</td>
+<td>monotonic</td>
+<td><ul class="first last simple">
+<li>singlethread</li>
+<li>wavefront</li>
+<li>workgroup</li>
+<li>agent</li>
+<li>system</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>global</li>
+<li>generic</li>
+</ul>
+</td>
+<td><ol class="first last arabic simple">
+<li>buffer/global/flat_store</li>
+</ol>
+</td>
+</tr>
+<tr class="row-even"><td>store atomic</td>
+<td>monotonic</td>
+<td><ul class="first last simple">
+<li>singlethread</li>
+<li>wavefront</li>
+<li>workgroup</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>local</li>
+</ul>
+</td>
+<td><ol class="first last arabic simple">
+<li>ds_store</li>
+</ol>
+</td>
+</tr>
+<tr class="row-odd"><td>atomicrmw</td>
+<td>monotonic</td>
+<td><ul class="first last simple">
+<li>singlethread</li>
+<li>wavefront</li>
+<li>workgroup</li>
+<li>agent</li>
+<li>system</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>global</li>
+<li>generic</li>
+</ul>
+</td>
+<td><ol class="first last arabic simple">
+<li>buffer/global/flat_atomic</li>
+</ol>
+</td>
+</tr>
+<tr class="row-even"><td>atomicrmw</td>
+<td>monotonic</td>
+<td><ul class="first last simple">
+<li>singlethread</li>
+<li>wavefront</li>
+<li>workgroup</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>local</li>
+</ul>
+</td>
+<td><ol class="first last arabic simple">
+<li>ds_atomic</li>
+</ol>
+</td>
+</tr>
+<tr class="row-odd"><td colspan="5"><strong>Acquire Atomic</strong></td>
+</tr>
+<tr class="row-even"><td>load atomic</td>
+<td>acquire</td>
+<td><ul class="first last simple">
+<li>singlethread</li>
+<li>wavefront</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>global</li>
+<li>local</li>
+<li>generic</li>
+</ul>
+</td>
+<td><ol class="first last arabic simple">
+<li>buffer/global/ds/flat_load</li>
+</ol>
+</td>
+</tr>
+<tr class="row-odd"><td>load atomic</td>
+<td>acquire</td>
+<td><ul class="first last simple">
+<li>workgroup</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>global</li>
+</ul>
+</td>
+<td><ol class="first last arabic simple">
+<li>buffer/global/flat_load</li>
+</ol>
+</td>
+</tr>
+<tr class="row-even"><td>load atomic</td>
+<td>acquire</td>
+<td><ul class="first last simple">
+<li>workgroup</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>local</li>
+</ul>
+</td>
+<td><ol class="first arabic simple">
+<li>ds_load</li>
+<li>s_waitcnt lgkmcnt(0)</li>
+</ol>
+<blockquote class="last">
+<div><ul class="simple">
+<li>If OpenCL, omit.</li>
+<li>Must happen before
+any following
+global/generic
+load/load
+atomic/store/store
+atomic/atomicrmw.</li>
+<li>Ensures any
+following global
+data read is no
+older than the load
+atomic value being
+acquired.</li>
+</ul>
+</div></blockquote>
+</td>
+</tr>
+<tr class="row-odd"><td>load atomic</td>
+<td>acquire</td>
+<td><ul class="first last simple">
+<li>workgroup</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>generic</li>
+</ul>
+</td>
+<td><ol class="first arabic simple">
+<li>flat_load</li>
+<li>s_waitcnt lgkmcnt(0)</li>
+</ol>
+<blockquote class="last">
+<div><ul class="simple">
+<li>If OpenCL, omit.</li>
+<li>Must happen before
+any following
+global/generic
+load/load
+atomic/store/store
+atomic/atomicrmw.</li>
+<li>Ensures any
+following global
+data read is no
+older than the load
+atomic value being
+acquired.</li>
+</ul>
+</div></blockquote>
+</td>
+</tr>
+<tr class="row-even"><td>load atomic</td>
+<td>acquire</td>
+<td><ul class="first last simple">
+<li>agent</li>
+<li>system</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>global</li>
+</ul>
+</td>
+<td><ol class="first arabic simple">
+<li>buffer/global/flat_load
+glc=1</li>
+<li>s_waitcnt vmcnt(0)</li>
+</ol>
+<blockquote>
+<div><ul class="simple">
+<li>Must happen before
+following
+buffer_wbinvl1_vol.</li>
+<li>Ensures the load
+has completed
+before invalidating
+the cache.</li>
+</ul>
+</div></blockquote>
+<ol class="arabic simple" start="3">
+<li>buffer_wbinvl1_vol</li>
+</ol>
+<blockquote class="last">
+<div><ul class="simple">
+<li>Must happen before
+any following
+global/generic
+load/load
+atomic/atomicrmw.</li>
+<li>Ensures that
+following
+loads will not see
+stale global data.</li>
+</ul>
+</div></blockquote>
+</td>
+</tr>
+<tr class="row-odd"><td>load atomic</td>
+<td>acquire</td>
+<td><ul class="first last simple">
+<li>agent</li>
+<li>system</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>generic</li>
+</ul>
+</td>
+<td><ol class="first arabic simple">
+<li>flat_load glc=1</li>
+<li>s_waitcnt vmcnt(0) &
+lgkmcnt(0)</li>
+</ol>
+<blockquote>
+<div><ul class="simple">
+<li>If OpenCL omit
+lgkmcnt(0).</li>
+<li>Must happen before
+following
+buffer_wbinvl1_vol.</li>
+<li>Ensures the flat_load
+has completed
+before invalidating
+the cache.</li>
+</ul>
+</div></blockquote>
+<ol class="arabic simple" start="3">
+<li>buffer_wbinvl1_vol</li>
+</ol>
+<blockquote class="last">
+<div><ul class="simple">
+<li>Must happen before
+any following
+global/generic
+load/load
+atomic/atomicrmw.</li>
+<li>Ensures that
+following loads
+will not see stale
+global data.</li>
+</ul>
+</div></blockquote>
+</td>
+</tr>
+<tr class="row-even"><td>atomicrmw</td>
+<td>acquire</td>
+<td><ul class="first last simple">
+<li>singlethread</li>
+<li>wavefront</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>global</li>
+<li>local</li>
+<li>generic</li>
+</ul>
+</td>
+<td><ol class="first last arabic simple">
+<li>buffer/global/ds/flat_atomic</li>
+</ol>
+</td>
+</tr>
+<tr class="row-odd"><td>atomicrmw</td>
+<td>acquire</td>
+<td><ul class="first last simple">
+<li>workgroup</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>global</li>
+</ul>
+</td>
+<td><ol class="first last arabic simple">
+<li>buffer/global/flat_atomic</li>
+</ol>
+</td>
+</tr>
+<tr class="row-even"><td>atomicrmw</td>
+<td>acquire</td>
+<td><ul class="first last simple">
+<li>workgroup</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>local</li>
+</ul>
+</td>
+<td><ol class="first arabic simple">
+<li>ds_atomic</li>
+<li>waitcnt lgkmcnt(0)</li>
+</ol>
+<blockquote class="last">
+<div><ul class="simple">
+<li>If OpenCL, omit.</li>
+<li>Must happen before
+any following
+global/generic
+load/load
+atomic/store/store
+atomic/atomicrmw.</li>
+<li>Ensures any
+following global
+data read is no
+older than the
+atomicrmw value
+being acquired.</li>
+</ul>
+</div></blockquote>
+</td>
+</tr>
+<tr class="row-odd"><td>atomicrmw</td>
+<td>acquire</td>
+<td><ul class="first last simple">
+<li>workgroup</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>generic</li>
+</ul>
+</td>
+<td><ol class="first arabic simple">
+<li>flat_atomic</li>
+<li>waitcnt lgkmcnt(0)</li>
+</ol>
+<blockquote class="last">
+<div><ul class="simple">
+<li>If OpenCL, omit.</li>
+<li>Must happen before
+any following
+global/generic
+load/load
+atomic/store/store
+atomic/atomicrmw.</li>
+<li>Ensures any
+following global
+data read is no
+older than the
+atomicrmw value
+being acquired.</li>
+</ul>
+</div></blockquote>
+</td>
+</tr>
+<tr class="row-even"><td>atomicrmw</td>
+<td>acquire</td>
+<td><ul class="first last simple">
+<li>agent</li>
+<li>system</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>global</li>
+</ul>
+</td>
+<td><ol class="first arabic simple">
+<li>buffer/global/flat_atomic</li>
+<li>s_waitcnt vmcnt(0)</li>
+</ol>
+<blockquote>
+<div><ul class="simple">
+<li>Must happen before
+following
+buffer_wbinvl1_vol.</li>
+<li>Ensures the
+atomicrmw has
+completed before
+invalidating the
+cache.</li>
+</ul>
+</div></blockquote>
+<ol class="arabic simple" start="3">
+<li>buffer_wbinvl1_vol</li>
+</ol>
+<blockquote class="last">
+<div><ul class="simple">
+<li>Must happen before
+any following
+global/generic
+load/load
+atomic/atomicrmw.</li>
+<li>Ensures that
+following loads
+will not see stale
+global data.</li>
+</ul>
+</div></blockquote>
+</td>
+</tr>
+<tr class="row-odd"><td>atomicrmw</td>
+<td>acquire</td>
+<td><ul class="first last simple">
+<li>agent</li>
+<li>system</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>generic</li>
+</ul>
+</td>
+<td><ol class="first arabic simple">
+<li>flat_atomic</li>
+<li>s_waitcnt vmcnt(0) &
+lgkmcnt(0)</li>
+</ol>
+<blockquote>
+<div><ul class="simple">
+<li>If OpenCL, omit
+lgkmcnt(0).</li>
+<li>Must happen before
+following
+buffer_wbinvl1_vol.</li>
+<li>Ensures the
+atomicrmw has
+completed before
+invalidating the
+cache.</li>
+</ul>
+</div></blockquote>
+<ol class="arabic simple" start="3">
+<li>buffer_wbinvl1_vol</li>
+</ol>
+<blockquote class="last">
+<div><ul class="simple">
+<li>Must happen before
+any following
+global/generic
+load/load
+atomic/atomicrmw.</li>
+<li>Ensures that
+following loads
+will not see stale
+global data.</li>
+</ul>
+</div></blockquote>
+</td>
+</tr>
+<tr class="row-even"><td>fence</td>
+<td>acquire</td>
+<td><ul class="first last simple">
+<li>singlethread</li>
+<li>wavefront</li>
+</ul>
+</td>
+<td><em>none</em></td>
+<td><em>none</em></td>
+</tr>
+<tr class="row-odd"><td>fence</td>
+<td>acquire</td>
+<td><ul class="first last simple">
+<li>workgroup</li>
+</ul>
+</td>
+<td><em>none</em></td>
+<td><ol class="first arabic simple">
+<li>s_waitcnt lgkmcnt(0)</li>
+</ol>
+<blockquote class="last">
+<div><ul class="simple">
+<li>If OpenCL and
+address space is
+not generic, omit.</li>
+<li>However, since LLVM
+currently has no
+address space on
+the fence need to
+conservatively
+always generate. If
+fence had an
+address space then
+set to address
+space of OpenCL
+fence flag, or to
+generic if both
+local and global
+flags are
+specified.</li>
+<li>Must happen after
+any preceding
+local/generic load
+atomic/atomicrmw
+with an equal or
+wider sync scope
+and memory ordering
+stronger than
+unordered (this is
+termed the
+fence-paired-atomic).</li>
+<li>Must happen before
+any following
+global/generic
+load/load
+atomic/store/store
+atomic/atomicrmw.</li>
+<li>Ensures any
+following global
+data read is no
+older than the
+value read by the
+fence-paired-atomic.</li>
+</ul>
+</div></blockquote>
+</td>
+</tr>
+<tr class="row-even"><td>fence</td>
+<td>acquire</td>
+<td><ul class="first last simple">
+<li>agent</li>
+<li>system</li>
+</ul>
+</td>
+<td><em>none</em></td>
+<td><ol class="first arabic simple">
+<li>s_waitcnt lgkmcnt(0) &
+vmcnt(0)</li>
+</ol>
+<blockquote>
+<div><ul class="simple">
+<li>If OpenCL and
+address space is
+not generic, omit
+lgkmcnt(0).</li>
+<li>However, since LLVM
+currently has no
+address space on
+the fence need to
+conservatively
+always generate
+(see comment for
+previous fence).</li>
+<li>Could be split into
+separate s_waitcnt
+vmcnt(0) and
+s_waitcnt
+lgkmcnt(0) to allow
+them to be
+independently moved
+according to the
+following rules.</li>
+<li>s_waitcnt vmcnt(0)
+must happen after
+any preceding
+global/generic load
+atomic/atomicrmw
+with an equal or
+wider sync scope
+and memory ordering
+stronger than
+unordered (this is
+termed the
+fence-paired-atomic).</li>
+<li>s_waitcnt lgkmcnt(0)
+must happen after
+any preceding
+local/generic load
+atomic/atomicrmw
+with an equal or
+wider sync scope
+and memory ordering
+stronger than
+unordered (this is
+termed the
+fence-paired-atomic).</li>
+<li>Must happen before
+the following
+buffer_wbinvl1_vol.</li>
+<li>Ensures that the
+fence-paired atomic
+has completed
+before invalidating
+the
+cache. Therefore
+any following
+locations read must
+be no older than
+the value read by
+the
+fence-paired-atomic.</li>
+</ul>
+</div></blockquote>
+<ol class="arabic simple" start="2">
+<li>buffer_wbinvl1_vol</li>
+</ol>
+<blockquote class="last">
+<div><ul class="simple">
+<li>Must happen before any
+following global/generic
+load/load
+atomic/store/store
+atomic/atomicrmw.</li>
+<li>Ensures that
+following loads
+will not see stale
+global data.</li>
+</ul>
+</div></blockquote>
+</td>
+</tr>
+<tr class="row-odd"><td colspan="5"><strong>Release Atomic</strong></td>
+</tr>
+<tr class="row-even"><td>store atomic</td>
+<td>release</td>
+<td><ul class="first last simple">
+<li>singlethread</li>
+<li>wavefront</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>global</li>
+<li>local</li>
+<li>generic</li>
+</ul>
+</td>
+<td><ol class="first last arabic simple">
+<li>buffer/global/ds/flat_store</li>
+</ol>
+</td>
+</tr>
+<tr class="row-odd"><td>store atomic</td>
+<td>release</td>
+<td><ul class="first last simple">
+<li>workgroup</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>global</li>
+</ul>
+</td>
+<td><ol class="first arabic simple">
+<li>s_waitcnt lgkmcnt(0)</li>
+</ol>
+<blockquote>
+<div><ul class="simple">
+<li>If OpenCL, omit.</li>
+<li>Must happen after
+any preceding
+local/generic
+load/store/load
+atomic/store
+atomic/atomicrmw.</li>
+<li>Must happen before
+the following
+store.</li>
+<li>Ensures that all
+memory operations
+to local have
+completed before
+performing the
+store that is being
+released.</li>
+</ul>
+</div></blockquote>
+<ol class="last arabic simple" start="2">
+<li>buffer/global/flat_store</li>
+</ol>
+</td>
+</tr>
+<tr class="row-even"><td>store atomic</td>
+<td>release</td>
+<td><ul class="first last simple">
+<li>workgroup</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>local</li>
+</ul>
+</td>
+<td><ol class="first last arabic simple">
+<li>ds_store</li>
+</ol>
+</td>
+</tr>
+<tr class="row-odd"><td>store atomic</td>
+<td>release</td>
+<td><ul class="first last simple">
+<li>workgroup</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>generic</li>
+</ul>
+</td>
+<td><ol class="first arabic simple">
+<li>s_waitcnt lgkmcnt(0)</li>
+</ol>
+<blockquote>
+<div><ul class="simple">
+<li>If OpenCL, omit.</li>
+<li>Must happen after
+any preceding
+local/generic
+load/store/load
+atomic/store
+atomic/atomicrmw.</li>
+<li>Must happen before
+the following
+store.</li>
+<li>Ensures that all
+memory operations
+to local have
+completed before
+performing the
+store that is being
+released.</li>
+</ul>
+</div></blockquote>
+<ol class="last arabic simple" start="2">
+<li>flat_store</li>
+</ol>
+</td>
+</tr>
+<tr class="row-even"><td>store atomic</td>
+<td>release</td>
+<td><ul class="first last simple">
+<li>agent</li>
+<li>system</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>global</li>
+<li>generic</li>
+</ul>
+</td>
+<td><ol class="first arabic simple">
+<li>s_waitcnt lgkmcnt(0) &
+vmcnt(0)</li>
+</ol>
+<blockquote>
+<div><ul class="simple">
+<li>If OpenCL, omit
+lgkmcnt(0).</li>
+<li>Could be split into
+separate s_waitcnt
+vmcnt(0) and
+s_waitcnt
+lgkmcnt(0) to allow
+them to be
+independently moved
+according to the
+following rules.</li>
+<li>s_waitcnt vmcnt(0)
+must happen after
+any preceding
+global/generic
+load/store/load
+atomic/store
+atomic/atomicrmw.</li>
+<li>s_waitcnt lgkmcnt(0)
+must happen after
+any preceding
+local/generic
+load/store/load
+atomic/store
+atomic/atomicrmw.</li>
+<li>Must happen before
+the following
+store.</li>
+<li>Ensures that all
+memory operations
+to memory have
+completed before
+performing the
+store that is being
+released.</li>
+</ul>
+</div></blockquote>
+<ol class="last arabic simple" start="2">
+<li>buffer/global/ds/flat_store</li>
+</ol>
+</td>
+</tr>
+<tr class="row-odd"><td>atomicrmw</td>
+<td>release</td>
+<td><ul class="first last simple">
+<li>singlethread</li>
+<li>wavefront</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>global</li>
+<li>local</li>
+<li>generic</li>
+</ul>
+</td>
+<td><ol class="first last arabic simple">
+<li>buffer/global/ds/flat_atomic</li>
+</ol>
+</td>
+</tr>
+<tr class="row-even"><td>atomicrmw</td>
+<td>release</td>
+<td><ul class="first last simple">
+<li>workgroup</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>global</li>
+</ul>
+</td>
+<td><ol class="first arabic simple">
+<li>s_waitcnt lgkmcnt(0)</li>
+</ol>
+<blockquote>
+<div><ul class="simple">
+<li>If OpenCL, omit.</li>
+<li>Must happen after
+any preceding
+local/generic
+load/store/load
+atomic/store
+atomic/atomicrmw.</li>
+<li>Must happen before
+the following
+atomicrmw.</li>
+<li>Ensures that all
+memory operations
+to local have
+completed before
+performing the
+atomicrmw that is
+being released.</li>
+</ul>
+</div></blockquote>
+<ol class="last arabic simple" start="2">
+<li>buffer/global/flat_atomic</li>
+</ol>
+</td>
+</tr>
+<tr class="row-odd"><td>atomicrmw</td>
+<td>release</td>
+<td><ul class="first last simple">
+<li>workgroup</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>local</li>
+</ul>
+</td>
+<td><ol class="first last arabic simple">
+<li>ds_atomic</li>
+</ol>
+</td>
+</tr>
+<tr class="row-even"><td>atomicrmw</td>
+<td>release</td>
+<td><ul class="first last simple">
+<li>workgroup</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>generic</li>
+</ul>
+</td>
+<td><ol class="first arabic simple">
+<li>s_waitcnt lgkmcnt(0)</li>
+</ol>
+<blockquote>
+<div><ul class="simple">
+<li>If OpenCL, omit.</li>
+<li>Must happen after
+any preceding
+local/generic
+load/store/load
+atomic/store
+atomic/atomicrmw.</li>
+<li>Must happen before
+the following
+atomicrmw.</li>
+<li>Ensures that all
+memory operations
+to local have
+completed before
+performing the
+atomicrmw that is
+being released.</li>
+</ul>
+</div></blockquote>
+<ol class="last arabic simple" start="2">
+<li>flat_atomic</li>
+</ol>
+</td>
+</tr>
+<tr class="row-odd"><td>atomicrmw</td>
+<td>release</td>
+<td><ul class="first last simple">
+<li>agent</li>
+<li>system</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>global</li>
+<li>generic</li>
+</ul>
+</td>
+<td><ol class="first arabic simple">
+<li>s_waitcnt lgkmcnt(0) &
+vmcnt(0)</li>
+</ol>
+<blockquote>
+<div><ul class="simple">
+<li>If OpenCL, omit
+lgkmcnt(0).</li>
+<li>Could be split into
+separate s_waitcnt
+vmcnt(0) and
+s_waitcnt
+lgkmcnt(0) to allow
+them to be
+independently moved
+according to the
+following rules.</li>
+<li>s_waitcnt vmcnt(0)
+must happen after
+any preceding
+global/generic
+load/store/load
+atomic/store
+atomic/atomicrmw.</li>
+<li>s_waitcnt lgkmcnt(0)
+must happen after
+any preceding
+local/generic
+load/store/load
+atomic/store
+atomic/atomicrmw.</li>
+<li>Must happen before
+the following
+atomicrmw.</li>
+<li>Ensures that all
+memory operations
+to global and local
+have completed
+before performing
+the atomicrmw that
+is being released.</li>
+</ul>
+</div></blockquote>
+<ol class="last arabic simple" start="2">
+<li>buffer/global/ds/flat_atomic</li>
+</ol>
+</td>
+</tr>
+<tr class="row-even"><td>fence</td>
+<td>release</td>
+<td><ul class="first last simple">
+<li>singlethread</li>
+<li>wavefront</li>
+</ul>
+</td>
+<td><em>none</em></td>
+<td><em>none</em></td>
+</tr>
+<tr class="row-odd"><td>fence</td>
+<td>release</td>
+<td><ul class="first last simple">
+<li>workgroup</li>
+</ul>
+</td>
+<td><em>none</em></td>
+<td><ol class="first arabic simple">
+<li>s_waitcnt lgkmcnt(0)</li>
+</ol>
+<blockquote class="last">
+<div><ul class="simple">
+<li>If OpenCL and
+address space is
+not generic, omit.</li>
+<li>However, since LLVM
+currently has no
+address space on
+the fence need to
+conservatively
+always generate. If
+fence had an
+address space then
+set to address
+space of OpenCL
+fence flag, or to
+generic if both
+local and global
+flags are
+specified.</li>
+<li>Must happen after
+any preceding
+local/generic
+load/load
+atomic/store/store
+atomic/atomicrmw.</li>
+<li>Must happen before
+any following store
+atomic/atomicrmw
+with an equal or
+wider sync scope
+and memory ordering
+stronger than
+unordered (this is
+termed the
+fence-paired-atomic).</li>
+<li>Ensures that all
+memory operations
+to local have
+completed before
+performing the
+following
+fence-paired-atomic.</li>
+</ul>
+</div></blockquote>
+</td>
+</tr>
+<tr class="row-even"><td>fence</td>
+<td>release</td>
+<td><ul class="first last simple">
+<li>agent</li>
+<li>system</li>
+</ul>
+</td>
+<td><em>none</em></td>
+<td><ol class="first arabic simple">
+<li>s_waitcnt lgkmcnt(0) &
+vmcnt(0)</li>
+</ol>
+<blockquote class="last">
+<div><ul class="simple">
+<li>If OpenCL and
+address space is
+not generic, omit
+lgkmcnt(0).</li>
+<li>If OpenCL and
+address space is
+local, omit
+vmcnt(0).</li>
+<li>However, since LLVM
+currently has no
+address space on
+the fence need to
+conservatively
+always generate. If
+fence had an
+address space then
+set to address
+space of OpenCL
+fence flag, or to
+generic if both
+local and global
+flags are
+specified.</li>
+<li>Could be split into
+separate s_waitcnt
+vmcnt(0) and
+s_waitcnt
+lgkmcnt(0) to allow
+them to be
+independently moved
+according to the
+following rules.</li>
+<li>s_waitcnt vmcnt(0)
+must happen after
+any preceding
+global/generic
+load/store/load
+atomic/store
+atomic/atomicrmw.</li>
+<li>s_waitcnt lgkmcnt(0)
+must happen after
+any preceding
+local/generic
+load/store/load
+atomic/store
+atomic/atomicrmw.</li>
+<li>Must happen before
+any following store
+atomic/atomicrmw
+with an equal or
+wider sync scope
+and memory ordering
+stronger than
+unordered (this is
+termed the
+fence-paired-atomic).</li>
+<li>Ensures that all
+memory operations
+have
+completed before
+performing the
+following
+fence-paired-atomic.</li>
+</ul>
+</div></blockquote>
+</td>
+</tr>
+<tr class="row-odd"><td colspan="5"><strong>Acquire-Release Atomic</strong></td>
+</tr>
+<tr class="row-even"><td>atomicrmw</td>
+<td>acq_rel</td>
+<td><ul class="first last simple">
+<li>singlethread</li>
+<li>wavefront</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>global</li>
+<li>local</li>
+<li>generic</li>
+</ul>
+</td>
+<td><ol class="first last arabic simple">
+<li>buffer/global/ds/flat_atomic</li>
+</ol>
+</td>
+</tr>
+<tr class="row-odd"><td>atomicrmw</td>
+<td>acq_rel</td>
+<td><ul class="first last simple">
+<li>workgroup</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>global</li>
+</ul>
+</td>
+<td><ol class="first arabic simple">
+<li>s_waitcnt lgkmcnt(0)</li>
+</ol>
+<blockquote>
+<div><ul class="simple">
+<li>If OpenCL, omit.</li>
+<li>Must happen after
+any preceding
+local/generic
+load/store/load
+atomic/store
+atomic/atomicrmw.</li>
+<li>Must happen before
+the following
+atomicrmw.</li>
+<li>Ensures that all
+memory operations
+to local have
+completed before
+performing the
+atomicrmw that is
+being released.</li>
+</ul>
+</div></blockquote>
+<ol class="last arabic simple" start="2">
+<li>buffer/global/flat_atomic</li>
+</ol>
+</td>
+</tr>
+<tr class="row-even"><td>atomicrmw</td>
+<td>acq_rel</td>
+<td><ul class="first last simple">
+<li>workgroup</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>local</li>
+</ul>
+</td>
+<td><ol class="first arabic simple">
+<li>ds_atomic</li>
+<li>s_waitcnt lgkmcnt(0)</li>
+</ol>
+<blockquote class="last">
+<div><ul class="simple">
+<li>If OpenCL, omit.</li>
+<li>Must happen before
+any following
+global/generic
+load/load
+atomic/store/store
+atomic/atomicrmw.</li>
+<li>Ensures any
+following global
+data read is no
+older than the load
+atomic value being
+acquired.</li>
+</ul>
+</div></blockquote>
+</td>
+</tr>
+<tr class="row-odd"><td>atomicrmw</td>
+<td>acq_rel</td>
+<td><ul class="first last simple">
+<li>workgroup</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>generic</li>
+</ul>
+</td>
+<td><ol class="first arabic simple">
+<li>s_waitcnt lgkmcnt(0)</li>
+</ol>
+<blockquote>
+<div><ul class="simple">
+<li>If OpenCL, omit.</li>
+<li>Must happen after
+any preceding
+local/generic
+load/store/load
+atomic/store
+atomic/atomicrmw.</li>
+<li>Must happen before
+the following
+atomicrmw.</li>
+<li>Ensures that all
+memory operations
+to local have
+completed before
+performing the
+atomicrmw that is
+being released.</li>
+</ul>
+</div></blockquote>
+<ol class="arabic simple" start="2">
+<li>flat_atomic</li>
+<li>s_waitcnt lgkmcnt(0)</li>
+</ol>
+<blockquote class="last">
+<div><ul class="simple">
+<li>If OpenCL, omit.</li>
+<li>Must happen before
+any following
+global/generic
+load/load
+atomic/store/store
+atomic/atomicrmw.</li>
+<li>Ensures any
+following global
+data read is no
+older than the load
+atomic value being
+acquired.</li>
+</ul>
+</div></blockquote>
+</td>
+</tr>
+<tr class="row-even"><td>atomicrmw</td>
+<td>acq_rel</td>
+<td><ul class="first last simple">
+<li>agent</li>
+<li>system</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>global</li>
+</ul>
+</td>
+<td><ol class="first arabic simple">
+<li>s_waitcnt lgkmcnt(0) &
+vmcnt(0)</li>
+</ol>
+<blockquote>
+<div><ul class="simple">
+<li>If OpenCL, omit
+lgkmcnt(0).</li>
+<li>Could be split into
+separate s_waitcnt
+vmcnt(0) and
+s_waitcnt
+lgkmcnt(0) to allow
+them to be
+independently moved
+according to the
+following rules.</li>
+<li>s_waitcnt vmcnt(0)
+must happen after
+any preceding
+global/generic
+load/store/load
+atomic/store
+atomic/atomicrmw.</li>
+<li>s_waitcnt lgkmcnt(0)
+must happen after
+any preceding
+local/generic
+load/store/load
+atomic/store
+atomic/atomicrmw.</li>
+<li>Must happen before
+the following
+atomicrmw.</li>
+<li>Ensures that all
+memory operations
+to global have
+completed before
+performing the
+atomicrmw that is
+being released.</li>
+</ul>
+</div></blockquote>
+<ol class="arabic simple" start="2">
+<li>buffer/global/flat_atomic</li>
+<li>s_waitcnt vmcnt(0)</li>
+</ol>
+<blockquote>
+<div><ul class="simple">
+<li>Must happen before
+following
+buffer_wbinvl1_vol.</li>
+<li>Ensures the
+atomicrmw has
+completed before
+invalidating the
+cache.</li>
+</ul>
+</div></blockquote>
+<ol class="arabic simple" start="4">
+<li>buffer_wbinvl1_vol</li>
+</ol>
+<blockquote class="last">
+<div><ul class="simple">
+<li>Must happen before
+any following
+global/generic
+load/load
+atomic/atomicrmw.</li>
+<li>Ensures that
+following loads
+will not see stale
+global data.</li>
+</ul>
+</div></blockquote>
+</td>
+</tr>
+<tr class="row-odd"><td>atomicrmw</td>
+<td>acq_rel</td>
+<td><ul class="first last simple">
+<li>agent</li>
+<li>system</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>generic</li>
+</ul>
+</td>
+<td><ol class="first arabic simple">
+<li>s_waitcnt lgkmcnt(0) &
+vmcnt(0)</li>
+</ol>
+<blockquote>
+<div><ul class="simple">
+<li>If OpenCL, omit
+lgkmcnt(0).</li>
+<li>Could be split into
+separate s_waitcnt
+vmcnt(0) and
+s_waitcnt
+lgkmcnt(0) to allow
+them to be
+independently moved
+according to the
+following rules.</li>
+<li>s_waitcnt vmcnt(0)
+must happen after
+any preceding
+global/generic
+load/store/load
+atomic/store
+atomic/atomicrmw.</li>
+<li>s_waitcnt lgkmcnt(0)
+must happen after
+any preceding
+local/generic
+load/store/load
+atomic/store
+atomic/atomicrmw.</li>
+<li>Must happen before
+the following
+atomicrmw.</li>
+<li>Ensures that all
+memory operations
+to global have
+completed before
+performing the
+atomicrmw that is
+being released.</li>
+</ul>
+</div></blockquote>
+<ol class="arabic simple" start="2">
+<li>flat_atomic</li>
+<li>s_waitcnt vmcnt(0) &
+lgkmcnt(0)</li>
+</ol>
+<blockquote>
+<div><ul class="simple">
+<li>If OpenCL, omit
+lgkmcnt(0).</li>
+<li>Must happen before
+following
+buffer_wbinvl1_vol.</li>
+<li>Ensures the
+atomicrmw has
+completed before
+invalidating the
+cache.</li>
+</ul>
+</div></blockquote>
+<ol class="arabic simple" start="4">
+<li>buffer_wbinvl1_vol</li>
+</ol>
+<blockquote class="last">
+<div><ul class="simple">
+<li>Must happen before
+any following
+global/generic
+load/load
+atomic/atomicrmw.</li>
+<li>Ensures that
+following loads
+will not see stale
+global data.</li>
+</ul>
+</div></blockquote>
+</td>
+</tr>
+<tr class="row-even"><td>fence</td>
+<td>acq_rel</td>
+<td><ul class="first last simple">
+<li>singlethread</li>
+<li>wavefront</li>
+</ul>
+</td>
+<td><em>none</em></td>
+<td><em>none</em></td>
+</tr>
+<tr class="row-odd"><td>fence</td>
+<td>acq_rel</td>
+<td><ul class="first last simple">
+<li>workgroup</li>
+</ul>
+</td>
+<td><em>none</em></td>
+<td><ol class="first arabic simple">
+<li>s_waitcnt lgkmcnt(0)</li>
+</ol>
+<blockquote class="last">
+<div><ul class="simple">
+<li>If OpenCL and
+address space is
+not generic, omit.</li>
+<li>However,
+since LLVM
+currently has no
+address space on
+the fence need to
+conservatively
+always generate
+(see comment for
+previous fence).</li>
+<li>Must happen after
+any preceding
+local/generic
+load/load
+atomic/store/store
+atomic/atomicrmw.</li>
+<li>Must happen before
+any following
+global/generic
+load/load
+atomic/store/store
+atomic/atomicrmw.</li>
+<li>Ensures that all
+memory operations
+to local have
+completed before
+performing any
+following global
+memory operations.</li>
+<li>Ensures that the
+preceding
+local/generic load
+atomic/atomicrmw
+with an equal or
+wider sync scope
+and memory ordering
+stronger than
+unordered (this is
+termed the
+acquire-fence-paired-atomic
+) has completed
+before following
+global memory
+operations. This
+satisfies the
+requirements of
+acquire.</li>
+<li>Ensures that all
+previous memory
+operations have
+completed before a
+following
+local/generic store
+atomic/atomicrmw
+with an equal or
+wider sync scope
+and memory ordering
+stronger than
+unordered (this is
+termed the
+release-fence-paired-atomic
+). This satisfies the
+requirements of
+release.</li>
+</ul>
+</div></blockquote>
+</td>
+</tr>
+<tr class="row-even"><td>fence</td>
+<td>acq_rel</td>
+<td><ul class="first last simple">
+<li>agent</li>
+<li>system</li>
+</ul>
+</td>
+<td><em>none</em></td>
+<td><ol class="first arabic simple">
+<li>s_waitcnt lgkmcnt(0) &
+vmcnt(0)</li>
+</ol>
+<blockquote>
+<div><ul class="simple">
+<li>If OpenCL and
+address space is
+not generic, omit
+lgkmcnt(0).</li>
+<li>However, since LLVM
+currently has no
+address space on
+the fence need to
+conservatively
+always generate
+(see comment for
+previous fence).</li>
+<li>Could be split into
+separate s_waitcnt
+vmcnt(0) and
+s_waitcnt
+lgkmcnt(0) to allow
+them to be
+independently moved
+according to the
+following rules.</li>
+<li>s_waitcnt vmcnt(0)
+must happen after
+any preceding
+global/generic
+load/store/load
+atomic/store
+atomic/atomicrmw.</li>
+<li>s_waitcnt lgkmcnt(0)
+must happen after
+any preceding
+local/generic
+load/store/load
+atomic/store
+atomic/atomicrmw.</li>
+<li>Must happen before
+the following
+buffer_wbinvl1_vol.</li>
+<li>Ensures that the
+preceding
+global/local/generic
+load
+atomic/atomicrmw
+with an equal or
+wider sync scope
+and memory ordering
+stronger than
+unordered (this is
+termed the
+acquire-fence-paired-atomic
+) has completed
+before invalidating
+the cache. This
+satisfies the
+requirements of
+acquire.</li>
+<li>Ensures that all
+previous memory
+operations have
+completed before a
+following
+global/local/generic
+store
+atomic/atomicrmw
+with an equal or
+wider sync scope
+and memory ordering
+stronger than
+unordered (this is
+termed the
+release-fence-paired-atomic
+). This satisfies the
+requirements of
+release.</li>
+</ul>
+</div></blockquote>
+<ol class="arabic simple" start="2">
+<li>buffer_wbinvl1_vol</li>
+</ol>
+<blockquote class="last">
+<div><ul class="simple">
+<li>Must happen before
+any following
+global/generic
+load/load
+atomic/store/store
+atomic/atomicrmw.</li>
+<li>Ensures that
+following loads
+will not see stale
+global data. This
+satisfies the
+requirements of
+acquire.</li>
+</ul>
+</div></blockquote>
+</td>
+</tr>
+<tr class="row-odd"><td colspan="5"><strong>Sequential Consistent Atomic</strong></td>
+</tr>
+<tr class="row-even"><td>load atomic</td>
+<td>seq_cst</td>
+<td><ul class="first last simple">
+<li>singlethread</li>
+<li>wavefront</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>global</li>
+<li>local</li>
+<li>generic</li>
+</ul>
+</td>
+<td><em>Same as corresponding
+load atomic acquire,
+except must generated
+all instructions even
+for OpenCL.</em></td>
+</tr>
+<tr class="row-odd"><td>load atomic</td>
+<td>seq_cst</td>
+<td><ul class="first last simple">
+<li>workgroup</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>global</li>
+<li>generic</li>
+</ul>
+</td>
+<td><ol class="first arabic simple">
+<li>s_waitcnt lgkmcnt(0)</li>
+</ol>
+<blockquote>
+<div><ul class="simple">
+<li>Must
+happen after
+preceding
+global/generic load
+atomic/store
+atomic/atomicrmw
+with memory
+ordering of seq_cst
+and with equal or
+wider sync scope.
+(Note that seq_cst
+fences have their
+own s_waitcnt
+lgkmcnt(0) and so do
+not need to be
+considered.)</li>
+<li>Ensures any
+preceding
+sequential
+consistent local
+memory instructions
+have completed
+before executing
+this sequentially
+consistent
+instruction. This
+prevents reordering
+a seq_cst store
+followed by a
+seq_cst load. (Note
+that seq_cst is
+stronger than
+acquire/release as
+the reordering of
+load acquire
+followed by a store
+release is
+prevented by the
+waitcnt of
+the release, but
+there is nothing
+preventing a store
+release followed by
+load acquire from
+competing out of
+order.)</li>
+</ul>
+</div></blockquote>
+<ol class="last arabic simple" start="2">
+<li><em>Following
+instructions same as
+corresponding load
+atomic acquire,
+except must generated
+all instructions even
+for OpenCL.</em></li>
+</ol>
+</td>
+</tr>
+<tr class="row-even"><td>load atomic</td>
+<td>seq_cst</td>
+<td><ul class="first last simple">
+<li>workgroup</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>local</li>
+</ul>
+</td>
+<td><em>Same as corresponding
+load atomic acquire,
+except must generated
+all instructions even
+for OpenCL.</em></td>
+</tr>
+<tr class="row-odd"><td>load atomic</td>
+<td>seq_cst</td>
+<td><ul class="first last simple">
+<li>agent</li>
+<li>system</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>global</li>
+<li>generic</li>
+</ul>
+</td>
+<td><ol class="first arabic simple">
+<li>s_waitcnt lgkmcnt(0) &
+vmcnt(0)</li>
+</ol>
+<blockquote>
+<div><ul class="simple">
+<li>Could be split into
+separate s_waitcnt
+vmcnt(0)
+and s_waitcnt
+lgkmcnt(0) to allow
+them to be
+independently moved
+according to the
+following rules.</li>
+<li>waitcnt lgkmcnt(0)
+must happen after
+preceding
+global/generic load
+atomic/store
+atomic/atomicrmw
+with memory
+ordering of seq_cst
+and with equal or
+wider sync scope.
+(Note that seq_cst
+fences have their
+own s_waitcnt
+lgkmcnt(0) and so do
+not need to be
+considered.)</li>
+<li>waitcnt vmcnt(0)
+must happen after
+preceding
+global/generic load
+atomic/store
+atomic/atomicrmw
+with memory
+ordering of seq_cst
+and with equal or
+wider sync scope.
+(Note that seq_cst
+fences have their
+own s_waitcnt
+vmcnt(0) and so do
+not need to be
+considered.)</li>
+<li>Ensures any
+preceding
+sequential
+consistent global
+memory instructions
+have completed
+before executing
+this sequentially
+consistent
+instruction. This
+prevents reordering
+a seq_cst store
+followed by a
+seq_cst load. (Note
+that seq_cst is
+stronger than
+acquire/release as
+the reordering of
+load acquire
+followed by a store
+release is
+prevented by the
+waitcnt of
+the release, but
+there is nothing
+preventing a store
+release followed by
+load acquire from
+competing out of
+order.)</li>
+</ul>
+</div></blockquote>
+<ol class="last arabic simple" start="2">
+<li><em>Following
+instructions same as
+corresponding load
+atomic acquire,
+except must generated
+all instructions even
+for OpenCL.</em></li>
+</ol>
+</td>
+</tr>
+<tr class="row-even"><td>store atomic</td>
+<td>seq_cst</td>
+<td><ul class="first last simple">
+<li>singlethread</li>
+<li>wavefront</li>
+<li>workgroup</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>global</li>
+<li>local</li>
+<li>generic</li>
+</ul>
+</td>
+<td><em>Same as corresponding
+store atomic release,
+except must generated
+all instructions even
+for OpenCL.</em></td>
+</tr>
+<tr class="row-odd"><td>store atomic</td>
+<td>seq_cst</td>
+<td><ul class="first last simple">
+<li>agent</li>
+<li>system</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>global</li>
+<li>generic</li>
+</ul>
+</td>
+<td><em>Same as corresponding
+store atomic release,
+except must generated
+all instructions even
+for OpenCL.</em></td>
+</tr>
+<tr class="row-even"><td>atomicrmw</td>
+<td>seq_cst</td>
+<td><ul class="first last simple">
+<li>singlethread</li>
+<li>wavefront</li>
+<li>workgroup</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>global</li>
+<li>local</li>
+<li>generic</li>
+</ul>
+</td>
+<td><em>Same as corresponding
+atomicrmw acq_rel,
+except must generated
+all instructions even
+for OpenCL.</em></td>
+</tr>
+<tr class="row-odd"><td>atomicrmw</td>
+<td>seq_cst</td>
+<td><ul class="first last simple">
+<li>agent</li>
+<li>system</li>
+</ul>
+</td>
+<td><ul class="first last simple">
+<li>global</li>
+<li>generic</li>
+</ul>
+</td>
+<td><em>Same as corresponding
+atomicrmw acq_rel,
+except must generated
+all instructions even
+for OpenCL.</em></td>
+</tr>
+<tr class="row-even"><td>fence</td>
+<td>seq_cst</td>
+<td><ul class="first last simple">
+<li>singlethread</li>
+<li>wavefront</li>
+<li>workgroup</li>
+<li>agent</li>
+<li>system</li>
+</ul>
+</td>
+<td><em>none</em></td>
+<td><em>Same as corresponding
+fence acq_rel,
+except must generated
+all instructions even
+for OpenCL.</em></td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+<p>The memory order also adds the single thread optimization constrains defined in
+table
+<a class="reference internal" href="#amdgpu-amdhsa-memory-model-single-thread-optimization-constraints-gfx6-gfx9-table"><em>AMDHSA Memory Model Single Thread Optimization Constraints GFX6-GFX9</em></a>.</p>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-amdhsa-memory-model-single-thread-optimization-constraints-gfx6-gfx9-table">
+<caption>AMDHSA Memory Model Single Thread Optimization Constraints GFX6-GFX9</caption>
+<colgroup>
+<col width="16%" />
+<col width="84%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">LLVM Memory</th>
+<th class="head">Optimization Constraints</th>
+</tr>
+<tr class="row-even"><th class="head">Ordering</th>
+<th class="head"> </th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-odd"><td>unordered</td>
+<td><em>none</em></td>
+</tr>
+<tr class="row-even"><td>monotonic</td>
+<td><em>none</em></td>
+</tr>
+<tr class="row-odd"><td>acquire</td>
+<td><ul class="first last simple">
+<li>If a load atomic/atomicrmw then no following load/load
+atomic/store/ store atomic/atomicrmw/fence instruction can
+be moved before the acquire.</li>
+<li>If a fence then same as load atomic, plus no preceding
+associated fence-paired-atomic can be moved after the fence.</li>
+</ul>
+</td>
+</tr>
+<tr class="row-even"><td>release</td>
+<td><ul class="first last simple">
+<li>If a store atomic/atomicrmw then no preceding load/load
+atomic/store/ store atomic/atomicrmw/fence instruction can
+be moved after the release.</li>
+<li>If a fence then same as store atomic, plus no following
+associated fence-paired-atomic can be moved before the
+fence.</li>
+</ul>
+</td>
+</tr>
+<tr class="row-odd"><td>acq_rel</td>
+<td>Same constraints as both acquire and release.</td>
+</tr>
+<tr class="row-even"><td>seq_cst</td>
+<td><ul class="first last simple">
+<li>If a load atomic then same constraints as acquire, plus no
+preceding sequentially consistent load atomic/store
+atomic/atomicrmw/fence instruction can be moved after the
+seq_cst.</li>
+<li>If a store atomic then the same constraints as release, plus
+no following sequentially consistent load atomic/store
+atomic/atomicrmw/fence instruction can be moved before the
+seq_cst.</li>
+<li>If an atomicrmw/fence then same constraints as acq_rel.</li>
+</ul>
+</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="trap-handler-abi">
+<h4><a class="toc-backref" href="#id77">Trap Handler ABI</a><a class="headerlink" href="#trap-handler-abi" title="Permalink to this headline">¶</a></h4>
+<p>For code objects generated by AMDGPU backend for HSA <a class="reference internal" href="#hsa">[HSA]</a> compatible runtimes
+(such as ROCm <a class="reference internal" href="#amd-rocm">[AMD-ROCm]</a>), the runtime installs a trap handler that supports
+the <tt class="docutils literal"><span class="pre">s_trap</span></tt> instruction with the following usage:</p>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-trap-handler-for-amdhsa-os-table">
+<caption>AMDGPU Trap Handler for AMDHSA OS</caption>
+<colgroup>
+<col width="26%" />
+<col width="21%" />
+<col width="21%" />
+<col width="32%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Usage</th>
+<th class="head">Code Sequence</th>
+<th class="head">Trap Handler
+Inputs</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>reserved</td>
+<td><tt class="docutils literal"><span class="pre">s_trap</span> <span class="pre">0x00</span></tt></td>
+<td> </td>
+<td>Reserved by hardware.</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">debugtrap(arg)</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">s_trap</span> <span class="pre">0x01</span></tt></td>
+<td><dl class="first last docutils">
+<dt><tt class="docutils literal"><span class="pre">SGPR0-1</span></tt>:</dt>
+<dd><tt class="docutils literal"><span class="pre">queue_ptr</span></tt></dd>
+<dt><tt class="docutils literal"><span class="pre">VGPR0</span></tt>:</dt>
+<dd><tt class="docutils literal"><span class="pre">arg</span></tt></dd>
+</dl>
+</td>
+<td>Reserved for HSA
+<tt class="docutils literal"><span class="pre">debugtrap</span></tt>
+intrinsic (not
+implemented).</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">llvm.trap</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">s_trap</span> <span class="pre">0x02</span></tt></td>
+<td><dl class="first last docutils">
+<dt><tt class="docutils literal"><span class="pre">SGPR0-1</span></tt>:</dt>
+<dd><tt class="docutils literal"><span class="pre">queue_ptr</span></tt></dd>
+</dl>
+</td>
+<td>Causes dispatch to be
+terminated and its
+associated queue put
+into the error state.</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">llvm.debugtrap</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">s_trap</span> <span class="pre">0x03</span></tt></td>
+<td> </td>
+<td><ul class="first last simple">
+<li>If debugger not
+installed then
+behaves as a
+no-operation. The
+trap handler is
+entered and
+immediately returns
+to continue
+execution of the
+wavefront.</li>
+<li>If the debugger is
+installed, causes
+the debug trap to be
+reported by the
+debugger and the
+wavefront is put in
+the halt state until
+resumed by the
+debugger.</li>
+</ul>
+</td>
+</tr>
+<tr class="row-even"><td>reserved</td>
+<td><tt class="docutils literal"><span class="pre">s_trap</span> <span class="pre">0x04</span></tt></td>
+<td> </td>
+<td>Reserved.</td>
+</tr>
+<tr class="row-odd"><td>reserved</td>
+<td><tt class="docutils literal"><span class="pre">s_trap</span> <span class="pre">0x05</span></tt></td>
+<td> </td>
+<td>Reserved.</td>
+</tr>
+<tr class="row-even"><td>reserved</td>
+<td><tt class="docutils literal"><span class="pre">s_trap</span> <span class="pre">0x06</span></tt></td>
+<td> </td>
+<td>Reserved.</td>
+</tr>
+<tr class="row-odd"><td>debugger breakpoint</td>
+<td><tt class="docutils literal"><span class="pre">s_trap</span> <span class="pre">0x07</span></tt></td>
+<td> </td>
+<td>Reserved for debugger
+breakpoints.</td>
+</tr>
+<tr class="row-even"><td>reserved</td>
+<td><tt class="docutils literal"><span class="pre">s_trap</span> <span class="pre">0x08</span></tt></td>
+<td> </td>
+<td>Reserved.</td>
+</tr>
+<tr class="row-odd"><td>reserved</td>
+<td><tt class="docutils literal"><span class="pre">s_trap</span> <span class="pre">0xfe</span></tt></td>
+<td> </td>
+<td>Reserved.</td>
+</tr>
+<tr class="row-even"><td>reserved</td>
+<td><tt class="docutils literal"><span class="pre">s_trap</span> <span class="pre">0xff</span></tt></td>
+<td> </td>
+<td>Reserved.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="amdpal">
+<h3><a class="toc-backref" href="#id78">AMDPAL</a><a class="headerlink" href="#amdpal" title="Permalink to this headline">¶</a></h3>
+<p>This section provides code conventions used when the target triple OS is
+<tt class="docutils literal"><span class="pre">amdpal</span></tt> (see <a class="reference internal" href="#amdgpu-target-triples"><em>Target Triples</em></a>) for passing runtime parameters
+from the application/runtime to each invocation of a hardware shader. These
+parameters include both generic, application-controlled parameters called
+<em>user data</em> as well as system-generated parameters that are a product of the
+draw or dispatch execution.</p>
+<div class="section" id="user-data">
+<h4><a class="toc-backref" href="#id79">User Data</a><a class="headerlink" href="#user-data" title="Permalink to this headline">¶</a></h4>
+<p>Each hardware stage has a set of 32-bit <em>user data registers</em> which can be
+written from a command buffer and then loaded into SGPRs when waves are launched
+via a subsequent dispatch or draw operation. This is the way most arguments are
+passed from the application/runtime to a hardware shader.</p>
+</div>
+<div class="section" id="compute-user-data">
+<h4><a class="toc-backref" href="#id80">Compute User Data</a><a class="headerlink" href="#compute-user-data" title="Permalink to this headline">¶</a></h4>
+<p>Compute shader user data mappings are simpler than graphics shaders, and have a
+fixed mapping.</p>
+<p>Note that there are always 10 available <em>user data entries</em> in registers -
+entries beyond that limit must be fetched from memory (via the spill table
+pointer) by the shader.</p>
+<blockquote>
+<div><table border="1" class="docutils" id="pal-compute-user-data-registers">
+<caption>PAL Compute Shader User Data Registers</caption>
+<colgroup>
+<col width="20%" />
+<col width="80%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">User Register</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>0</td>
+<td>Global Internal Table (32-bit pointer)</td>
+</tr>
+<tr class="row-odd"><td>1</td>
+<td>Per-Shader Internal Table (32-bit pointer)</td>
+</tr>
+<tr class="row-even"><td>2 - 11</td>
+<td>Application-Controlled User Data (10 32-bit values)</td>
+</tr>
+<tr class="row-odd"><td>12</td>
+<td>Spill Table (32-bit pointer)</td>
+</tr>
+<tr class="row-even"><td>13 - 14</td>
+<td>Thread Group Count (64-bit pointer)</td>
+</tr>
+<tr class="row-odd"><td>15</td>
+<td>GDS Range</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="graphics-user-data">
+<h4><a class="toc-backref" href="#id81">Graphics User Data</a><a class="headerlink" href="#graphics-user-data" title="Permalink to this headline">¶</a></h4>
+<p>Graphics pipelines support a much more flexible user data mapping:</p>
+<blockquote>
+<div><table border="1" class="docutils" id="pal-graphics-user-data-registers">
+<caption>PAL Graphics Shader User Data Registers</caption>
+<colgroup>
+<col width="23%" />
+<col width="77%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">User Register</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>0</td>
+<td>Global Internal Table (32-bit pointer)</td>
+</tr>
+<tr class="row-odd"><td><ul class="first last simple">
+<li></li>
+</ul>
+</td>
+<td>Per-Shader Internal Table (32-bit pointer)</td>
+</tr>
+<tr class="row-even"><td><ul class="first last simple">
+<li>1-15</li>
+</ul>
+</td>
+<td>Application Controlled User Data
+(1-15 Contiguous 32-bit Values in Registers)</td>
+</tr>
+<tr class="row-odd"><td><ul class="first last simple">
+<li></li>
+</ul>
+</td>
+<td>Spill Table (32-bit pointer)</td>
+</tr>
+<tr class="row-even"><td><ul class="first last simple">
+<li></li>
+</ul>
+</td>
+<td>Draw Index (First Stage Only)</td>
+</tr>
+<tr class="row-odd"><td><ul class="first last simple">
+<li></li>
+</ul>
+</td>
+<td>Vertex Offset (First Stage Only)</td>
+</tr>
+<tr class="row-even"><td><ul class="first last simple">
+<li></li>
+</ul>
+</td>
+<td>Instance Offset (First Stage Only)</td>
+</tr>
+</tbody>
+</table>
+<p>The placement of the global internal table remains fixed in the first <em>user
+data SGPR register</em>. Otherwise all parameters are optional, and can be mapped
+to any desired <em>user data SGPR register</em>, with the following regstrictions:</p>
+<ul class="simple">
+<li>Draw Index, Vertex Offset, and Instance Offset can only be used by the first
+activehardware stage in a graphics pipeline (i.e. where the API vertex
+shader runs).</li>
+<li>Application-controlled user data must be mapped into a contiguous range of
+user data registers.</li>
+<li>The application-controlled user data range supports compaction remapping, so
+only <em>entries</em> that are actually consumed by the shader must be assigned to
+corresponding <em>registers</em>. Note that in order to support an efficient runtime
+implementation, the remapping must pack <em>registers</em> in the same order as
+<em>entries</em>, with unused <em>entries</em> removed.</li>
+</ul>
+</div></blockquote>
+</div>
+<div class="section" id="global-internal-table">
+<span id="pal-global-internal-table"></span><h4><a class="toc-backref" href="#id82">Global Internal Table</a><a class="headerlink" href="#global-internal-table" title="Permalink to this headline">¶</a></h4>
+<p>The global internal table is a table of <em>shader resource descriptors</em> (SRDs) that
+define how certain engine-wide, runtime-managed resources should be accessed
+from a shader. The majority of these resources have HW-defined formats, and it
+is up to the compiler to write/read data as required by the target hardware.</p>
+<p>The following table illustrates the required format:</p>
+<blockquote>
+<div><table border="1" class="docutils" id="pal-git-table">
+<caption>PAL Global Internal Table</caption>
+<colgroup>
+<col width="25%" />
+<col width="75%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Offset</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>0-3</td>
+<td>Graphics Scratch SRD</td>
+</tr>
+<tr class="row-odd"><td>4-7</td>
+<td>Compute Scratch SRD</td>
+</tr>
+<tr class="row-even"><td>8-11</td>
+<td>ES/GS Ring Output SRD</td>
+</tr>
+<tr class="row-odd"><td>12-15</td>
+<td>ES/GS Ring Input SRD</td>
+</tr>
+<tr class="row-even"><td>16-19</td>
+<td>GS/VS Ring Output #0</td>
+</tr>
+<tr class="row-odd"><td>20-23</td>
+<td>GS/VS Ring Output #1</td>
+</tr>
+<tr class="row-even"><td>24-27</td>
+<td>GS/VS Ring Output #2</td>
+</tr>
+<tr class="row-odd"><td>28-31</td>
+<td>GS/VS Ring Output #3</td>
+</tr>
+<tr class="row-even"><td>32-35</td>
+<td>GS/VS Ring Input SRD</td>
+</tr>
+<tr class="row-odd"><td>36-39</td>
+<td>Tessellation Factor Buffer SRD</td>
+</tr>
+<tr class="row-even"><td>40-43</td>
+<td>Off-Chip LDS Buffer SRD</td>
+</tr>
+<tr class="row-odd"><td>44-47</td>
+<td>Off-Chip Param Cache Buffer SRD</td>
+</tr>
+<tr class="row-even"><td>48-51</td>
+<td>Sample Position Buffer SRD</td>
+</tr>
+<tr class="row-odd"><td>52</td>
+<td>vaRange::ShadowDescriptorTable High Bits</td>
+</tr>
+</tbody>
+</table>
+<p>The pointer to the global internal table passed to the shader as user data
+is a 32-bit pointer. The top 32 bits should be assumed to be the same as
+the top 32 bits of the pipeline, so the shader may use the program
+counter’s top 32 bits.</p>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="unspecified-os">
+<h3><a class="toc-backref" href="#id83">Unspecified OS</a><a class="headerlink" href="#unspecified-os" title="Permalink to this headline">¶</a></h3>
+<p>This section provides code conventions used when the target triple OS is
+empty (see <a class="reference internal" href="#amdgpu-target-triples"><em>Target Triples</em></a>).</p>
+<div class="section" id="id35">
+<h4><a class="toc-backref" href="#id84">Trap Handler ABI</a><a class="headerlink" href="#id35" title="Permalink to this headline">¶</a></h4>
+<p>For code objects generated by AMDGPU backend for non-amdhsa OS, the runtime does
+not install a trap handler. The <tt class="docutils literal"><span class="pre">llvm.trap</span></tt> and <tt class="docutils literal"><span class="pre">llvm.debugtrap</span></tt>
+instructions are handled as follows:</p>
+<blockquote>
+<div><table border="1" class="docutils" id="amdgpu-trap-handler-for-non-amdhsa-os-table">
+<caption>AMDGPU Trap Handler for Non-AMDHSA OS</caption>
+<colgroup>
+<col width="21%" />
+<col width="21%" />
+<col width="59%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Usage</th>
+<th class="head">Code Sequence</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>llvm.trap</td>
+<td>s_endpgm</td>
+<td>Causes wavefront to be terminated.</td>
+</tr>
+<tr class="row-odd"><td>llvm.debugtrap</td>
+<td><em>none</em></td>
+<td>Compiler warning given that there is no
+trap handler installed.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+</div>
+</div>
+<div class="section" id="source-languages">
+<h2><a class="toc-backref" href="#id85">Source Languages</a><a class="headerlink" href="#source-languages" title="Permalink to this headline">¶</a></h2>
+<div class="section" id="opencl">
+<span id="amdgpu-opencl"></span><h3><a class="toc-backref" href="#id86">OpenCL</a><a class="headerlink" href="#opencl" title="Permalink to this headline">¶</a></h3>
+<p>When the language is OpenCL the following differences occur:</p>
+<ol class="arabic simple">
+<li>The OpenCL memory model is used (see <a class="reference internal" href="#amdgpu-amdhsa-memory-model"><em>Memory Model</em></a>).</li>
+<li>The AMDGPU backend appends additional arguments to the kernel’s explicit
+arguments for the AMDHSA OS (see
+<a class="reference internal" href="#opencl-kernel-implicit-arguments-appended-for-amdhsa-os-table"><em>OpenCL kernel implicit arguments appended for AMDHSA OS</em></a>).</li>
+<li>Additional metadata is generated
+(see <a class="reference internal" href="#amdgpu-amdhsa-code-object-metadata"><em>Code Object Metadata</em></a>).</li>
+</ol>
+<blockquote>
+<div><table border="1" class="docutils" id="opencl-kernel-implicit-arguments-appended-for-amdhsa-os-table">
+<caption>OpenCL kernel implicit arguments appended for AMDHSA OS</caption>
+<colgroup>
+<col width="13%" />
+<col width="6%" />
+<col width="14%" />
+<col width="67%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Position</th>
+<th class="head">Byte
+Size</th>
+<th class="head">Byte
+Alignment</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>1</td>
+<td>8</td>
+<td>8</td>
+<td>OpenCL Global Offset X</td>
+</tr>
+<tr class="row-odd"><td>2</td>
+<td>8</td>
+<td>8</td>
+<td>OpenCL Global Offset Y</td>
+</tr>
+<tr class="row-even"><td>3</td>
+<td>8</td>
+<td>8</td>
+<td>OpenCL Global Offset Z</td>
+</tr>
+<tr class="row-odd"><td>4</td>
+<td>8</td>
+<td>8</td>
+<td>OpenCL address of printf buffer</td>
+</tr>
+<tr class="row-even"><td>5</td>
+<td>8</td>
+<td>8</td>
+<td>OpenCL address of virtual queue used by
+enqueue_kernel.</td>
+</tr>
+<tr class="row-odd"><td>6</td>
+<td>8</td>
+<td>8</td>
+<td>OpenCL address of AqlWrap struct used by
+enqueue_kernel.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+<div class="section" id="hcc">
+<span id="amdgpu-hcc"></span><h3><a class="toc-backref" href="#id87">HCC</a><a class="headerlink" href="#hcc" title="Permalink to this headline">¶</a></h3>
+<p>When the language is HCC the following differences occur:</p>
+<ol class="arabic simple">
+<li>The HSA memory model is used (see <a class="reference internal" href="#amdgpu-amdhsa-memory-model"><em>Memory Model</em></a>).</li>
+</ol>
+</div>
+<div class="section" id="assembler">
+<span id="amdgpu-assembler"></span><h3><a class="toc-backref" href="#id88">Assembler</a><a class="headerlink" href="#assembler" title="Permalink to this headline">¶</a></h3>
+<p>AMDGPU backend has LLVM-MC based assembler which is currently in development.
+It supports AMDGCN GFX6-GFX9.</p>
+<p>This section describes general syntax for instructions and operands.</p>
+<div class="section" id="instructions">
+<h4><a class="toc-backref" href="#id89">Instructions</a><a class="headerlink" href="#instructions" title="Permalink to this headline">¶</a></h4>
+<div class="toctree-wrapper compound">
+</div>
+<p>An instruction has the following syntax:</p>
+<blockquote>
+<div><em><opcode> <operand0>, <operand1>,... <modifier0> <modifier1>...</em></div></blockquote>
+<p>Note that operands are normally comma-separated while modifiers are space-separated.</p>
+<p>The order of operands and modifiers is fixed. Most modifiers are optional and may be omitted.</p>
+<p>See detailed instruction syntax description for <a class="reference internal" href="AMDGPUAsmGFX7.html"><em>GFX7</em></a>,
+<a class="reference internal" href="AMDGPUAsmGFX8.html"><em>GFX8</em></a> and <a class="reference internal" href="AMDGPUAsmGFX9.html"><em>GFX9</em></a>.</p>
+<p>Note that features under development are not included in this description.</p>
+<p>For more information about instructions, their semantics and supported combinations of
+operands, refer to one of instruction set architecture manuals
+<a class="reference internal" href="#amd-gcn-gfx6">[AMD-GCN-GFX6]</a>, <a class="reference internal" href="#amd-gcn-gfx7">[AMD-GCN-GFX7]</a>, <a class="reference internal" href="#amd-gcn-gfx8">[AMD-GCN-GFX8]</a> and <a class="reference internal" href="#amd-gcn-gfx9">[AMD-GCN-GFX9]</a>.</p>
+</div>
+<div class="section" id="operands">
+<h4><a class="toc-backref" href="#id90">Operands</a><a class="headerlink" href="#operands" title="Permalink to this headline">¶</a></h4>
+<p>The following syntax for register operands is supported:</p>
+<ul class="simple">
+<li>SGPR registers: s0, ... or s[0], ...</li>
+<li>VGPR registers: v0, ... or v[0], ...</li>
+<li>TTMP registers: ttmp0, ... or ttmp[0], ...</li>
+<li>Special registers: exec (exec_lo, exec_hi), vcc (vcc_lo, vcc_hi), flat_scratch (flat_scratch_lo, flat_scratch_hi)</li>
+<li>Special trap registers: tba (tba_lo, tba_hi), tma (tma_lo, tma_hi)</li>
+<li>Register pairs, quads, etc: s[2:3], v[10:11], ttmp[5:6], s[4:7], v[12:15], ttmp[4:7], s[8:15], ...</li>
+<li>Register lists: [s0, s1], [ttmp0, ttmp1, ttmp2, ttmp3]</li>
+<li>Register index expressions: v[2*2], s[1-1:2-1]</li>
+<li>‘off’ indicates that an operand is not enabled</li>
+</ul>
+</div>
+<div class="section" id="modifiers">
+<h4><a class="toc-backref" href="#id91">Modifiers</a><a class="headerlink" href="#modifiers" title="Permalink to this headline">¶</a></h4>
+<p>Detailed description of modifiers may be found <a class="reference internal" href="AMDGPUOperandSyntax.html"><em>here</em></a>.</p>
+</div>
+<div class="section" id="instruction-examples">
+<h4><a class="toc-backref" href="#id92">Instruction Examples</a><a class="headerlink" href="#instruction-examples" title="Permalink to this headline">¶</a></h4>
+<div class="section" id="ds">
+<h5><a class="toc-backref" href="#id93">DS</a><a class="headerlink" href="#ds" title="Permalink to this headline">¶</a></h5>
+<div class="highlight-nasm"><div class="highlight"><pre><span class="nf">ds_add_u32</span> <span class="nv">v2</span><span class="p">,</span> <span class="nv">v4</span> <span class="nv">offset</span><span class="p">:</span><span class="mi">16</span>
+<span class="nf">ds_write_src2_b64</span> <span class="nv">v2</span> <span class="nv">offset0</span><span class="p">:</span><span class="mi">4</span> <span class="nv">offset1</span><span class="p">:</span><span class="mi">8</span>
+<span class="nf">ds_cmpst_f32</span> <span class="nv">v2</span><span class="p">,</span> <span class="nv">v4</span><span class="p">,</span> <span class="nv">v6</span>
+<span class="nf">ds_min_rtn_f64</span> <span class="nv">v</span><span class="p">[</span><span class="mi">8</span><span class="p">:</span><span class="mi">9</span><span class="p">],</span> <span class="nv">v2</span><span class="p">,</span> <span class="nv">v</span><span class="p">[</span><span class="mi">4</span><span class="p">:</span><span class="mi">5</span><span class="p">]</span>
+</pre></div>
+</div>
+<p>For full list of supported instructions, refer to “LDS/GDS instructions” in ISA Manual.</p>
+</div>
+<div class="section" id="flat">
+<h5><a class="toc-backref" href="#id94">FLAT</a><a class="headerlink" href="#flat" title="Permalink to this headline">¶</a></h5>
+<div class="highlight-nasm"><div class="highlight"><pre><span class="nf">flat_load_dword</span> <span class="nv">v1</span><span class="p">,</span> <span class="nv">v</span><span class="p">[</span><span class="mi">3</span><span class="p">:</span><span class="mi">4</span><span class="p">]</span>
+<span class="nf">flat_store_dwordx3</span> <span class="nv">v</span><span class="p">[</span><span class="mi">3</span><span class="p">:</span><span class="mi">4</span><span class="p">],</span> <span class="nv">v</span><span class="p">[</span><span class="mi">5</span><span class="p">:</span><span class="mi">7</span><span class="p">]</span>
+<span class="nf">flat_atomic_swap</span> <span class="nv">v1</span><span class="p">,</span> <span class="nv">v</span><span class="p">[</span><span class="mi">3</span><span class="p">:</span><span class="mi">4</span><span class="p">],</span> <span class="nv">v5</span> <span class="nv">glc</span>
+<span class="nf">flat_atomic_cmpswap</span> <span class="nv">v1</span><span class="p">,</span> <span class="nv">v</span><span class="p">[</span><span class="mi">3</span><span class="p">:</span><span class="mi">4</span><span class="p">],</span> <span class="nv">v</span><span class="p">[</span><span class="mi">5</span><span class="p">:</span><span class="mi">6</span><span class="p">]</span> <span class="nv">glc</span> <span class="nv">slc</span>
+<span class="nf">flat_atomic_fmax_x2</span> <span class="nv">v</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="mi">2</span><span class="p">],</span> <span class="nv">v</span><span class="p">[</span><span class="mi">3</span><span class="p">:</span><span class="mi">4</span><span class="p">],</span> <span class="nv">v</span><span class="p">[</span><span class="mi">5</span><span class="p">:</span><span class="mi">6</span><span class="p">]</span> <span class="nv">glc</span>
+</pre></div>
+</div>
+<p>For full list of supported instructions, refer to “FLAT instructions” in ISA Manual.</p>
+</div>
+<div class="section" id="mubuf">
+<h5><a class="toc-backref" href="#id95">MUBUF</a><a class="headerlink" href="#mubuf" title="Permalink to this headline">¶</a></h5>
+<div class="highlight-nasm"><div class="highlight"><pre><span class="nf">buffer_load_dword</span> <span class="nv">v1</span><span class="p">,</span> <span class="nv">off</span><span class="p">,</span> <span class="nv">s</span><span class="p">[</span><span class="mi">4</span><span class="p">:</span><span class="mi">7</span><span class="p">],</span> <span class="nv">s1</span>
+<span class="nf">buffer_store_dwordx4</span> <span class="nv">v</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="mi">4</span><span class="p">],</span> <span class="nv">v2</span><span class="p">,</span> <span class="nv">ttmp</span><span class="p">[</span><span class="mi">4</span><span class="p">:</span><span class="mi">7</span><span class="p">],</span> <span class="nv">s1</span> <span class="nv">offen</span> <span class="nv">offset</span><span class="p">:</span><span class="mi">4</span> <span class="nv">glc</span> <span class="nv">tfe</span>
+<span class="nf">buffer_store_format_xy</span> <span class="nv">v</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="mi">2</span><span class="p">],</span> <span class="nv">off</span><span class="p">,</span> <span class="nv">s</span><span class="p">[</span><span class="mi">4</span><span class="p">:</span><span class="mi">7</span><span class="p">],</span> <span class="nv">s1</span>
+<span class="nf">buffer_wbinvl1</span>
+<span class="nf">buffer_atomic_inc</span> <span class="nv">v1</span><span class="p">,</span> <span class="nv">v2</span><span class="p">,</span> <span class="nv">s</span><span class="p">[</span><span class="mi">8</span><span class="p">:</span><span class="mi">11</span><span class="p">],</span> <span class="nv">s4</span> <span class="nv">idxen</span> <span class="nv">offset</span><span class="p">:</span><span class="mi">4</span> <span class="nv">slc</span>
+</pre></div>
+</div>
+<p>For full list of supported instructions, refer to “MUBUF Instructions” in ISA Manual.</p>
+</div>
+<div class="section" id="smrd-smem">
+<h5><a class="toc-backref" href="#id96">SMRD/SMEM</a><a class="headerlink" href="#smrd-smem" title="Permalink to this headline">¶</a></h5>
+<div class="highlight-nasm"><div class="highlight"><pre><span class="nf">s_load_dword</span> <span class="nv">s1</span><span class="p">,</span> <span class="nv">s</span><span class="p">[</span><span class="mi">2</span><span class="p">:</span><span class="mi">3</span><span class="p">],</span> <span class="mh">0xfc</span>
+<span class="nf">s_load_dwordx8</span> <span class="nv">s</span><span class="p">[</span><span class="mi">8</span><span class="p">:</span><span class="mi">15</span><span class="p">],</span> <span class="nv">s</span><span class="p">[</span><span class="mi">2</span><span class="p">:</span><span class="mi">3</span><span class="p">],</span> <span class="nv">s4</span>
+<span class="nf">s_load_dwordx16</span> <span class="nv">s</span><span class="p">[</span><span class="mi">88</span><span class="p">:</span><span class="mi">103</span><span class="p">],</span> <span class="nv">s</span><span class="p">[</span><span class="mi">2</span><span class="p">:</span><span class="mi">3</span><span class="p">],</span> <span class="nv">s4</span>
+<span class="nf">s_dcache_inv_vol</span>
+<span class="nf">s_memtime</span> <span class="nv">s</span><span class="p">[</span><span class="mi">4</span><span class="p">:</span><span class="mi">5</span><span class="p">]</span>
+</pre></div>
+</div>
+<p>For full list of supported instructions, refer to “Scalar Memory Operations” in ISA Manual.</p>
+</div>
+<div class="section" id="sop1">
+<h5><a class="toc-backref" href="#id97">SOP1</a><a class="headerlink" href="#sop1" title="Permalink to this headline">¶</a></h5>
+<div class="highlight-nasm"><div class="highlight"><pre><span class="nf">s_mov_b32</span> <span class="nv">s1</span><span class="p">,</span> <span class="nv">s2</span>
+<span class="nf">s_mov_b64</span> <span class="nv">s</span><span class="p">[</span><span class="mi">0</span><span class="p">:</span><span class="mi">1</span><span class="p">],</span> <span class="mh">0x80000000</span>
+<span class="nf">s_cmov_b32</span> <span class="nv">s1</span><span class="p">,</span> <span class="mi">200</span>
+<span class="nf">s_wqm_b64</span> <span class="nv">s</span><span class="p">[</span><span class="mi">2</span><span class="p">:</span><span class="mi">3</span><span class="p">],</span> <span class="nv">s</span><span class="p">[</span><span class="mi">4</span><span class="p">:</span><span class="mi">5</span><span class="p">]</span>
+<span class="nf">s_bcnt0_i32_b64</span> <span class="nv">s1</span><span class="p">,</span> <span class="nv">s</span><span class="p">[</span><span class="mi">2</span><span class="p">:</span><span class="mi">3</span><span class="p">]</span>
+<span class="nf">s_swappc_b64</span> <span class="nv">s</span><span class="p">[</span><span class="mi">2</span><span class="p">:</span><span class="mi">3</span><span class="p">],</span> <span class="nv">s</span><span class="p">[</span><span class="mi">4</span><span class="p">:</span><span class="mi">5</span><span class="p">]</span>
+<span class="nf">s_cbranch_join</span> <span class="nv">s</span><span class="p">[</span><span class="mi">4</span><span class="p">:</span><span class="mi">5</span><span class="p">]</span>
+</pre></div>
+</div>
+<p>For full list of supported instructions, refer to “SOP1 Instructions” in ISA Manual.</p>
+</div>
+<div class="section" id="sop2">
+<h5><a class="toc-backref" href="#id98">SOP2</a><a class="headerlink" href="#sop2" title="Permalink to this headline">¶</a></h5>
+<div class="highlight-nasm"><div class="highlight"><pre><span class="nf">s_add_u32</span> <span class="nv">s1</span><span class="p">,</span> <span class="nv">s2</span><span class="p">,</span> <span class="nv">s3</span>
+<span class="nf">s_and_b64</span> <span class="nv">s</span><span class="p">[</span><span class="mi">2</span><span class="p">:</span><span class="mi">3</span><span class="p">],</span> <span class="nv">s</span><span class="p">[</span><span class="mi">4</span><span class="p">:</span><span class="mi">5</span><span class="p">],</span> <span class="nv">s</span><span class="p">[</span><span class="mi">6</span><span class="p">:</span><span class="mi">7</span><span class="p">]</span>
+<span class="nf">s_cselect_b32</span> <span class="nv">s1</span><span class="p">,</span> <span class="nv">s2</span><span class="p">,</span> <span class="nv">s3</span>
+<span class="nf">s_andn2_b32</span> <span class="nv">s2</span><span class="p">,</span> <span class="nv">s4</span><span class="p">,</span> <span class="nv">s6</span>
+<span class="nf">s_lshr_b64</span> <span class="nv">s</span><span class="p">[</span><span class="mi">2</span><span class="p">:</span><span class="mi">3</span><span class="p">],</span> <span class="nv">s</span><span class="p">[</span><span class="mi">4</span><span class="p">:</span><span class="mi">5</span><span class="p">],</span> <span class="nv">s6</span>
+<span class="nf">s_ashr_i32</span> <span class="nv">s2</span><span class="p">,</span> <span class="nv">s4</span><span class="p">,</span> <span class="nv">s6</span>
+<span class="nf">s_bfm_b64</span> <span class="nv">s</span><span class="p">[</span><span class="mi">2</span><span class="p">:</span><span class="mi">3</span><span class="p">],</span> <span class="nv">s4</span><span class="p">,</span> <span class="nv">s6</span>
+<span class="nf">s_bfe_i64</span> <span class="nv">s</span><span class="p">[</span><span class="mi">2</span><span class="p">:</span><span class="mi">3</span><span class="p">],</span> <span class="nv">s</span><span class="p">[</span><span class="mi">4</span><span class="p">:</span><span class="mi">5</span><span class="p">],</span> <span class="nv">s6</span>
+<span class="nf">s_cbranch_g_fork</span> <span class="nv">s</span><span class="p">[</span><span class="mi">4</span><span class="p">:</span><span class="mi">5</span><span class="p">],</span> <span class="nv">s</span><span class="p">[</span><span class="mi">6</span><span class="p">:</span><span class="mi">7</span><span class="p">]</span>
+</pre></div>
+</div>
+<p>For full list of supported instructions, refer to “SOP2 Instructions” in ISA Manual.</p>
+</div>
+<div class="section" id="sopc">
+<h5><a class="toc-backref" href="#id99">SOPC</a><a class="headerlink" href="#sopc" title="Permalink to this headline">¶</a></h5>
+<div class="highlight-nasm"><div class="highlight"><pre><span class="nf">s_cmp_eq_i32</span> <span class="nv">s1</span><span class="p">,</span> <span class="nv">s2</span>
+<span class="nf">s_bitcmp1_b32</span> <span class="nv">s1</span><span class="p">,</span> <span class="nv">s2</span>
+<span class="nf">s_bitcmp0_b64</span> <span class="nv">s</span><span class="p">[</span><span class="mi">2</span><span class="p">:</span><span class="mi">3</span><span class="p">],</span> <span class="nv">s4</span>
+<span class="nf">s_setvskip</span> <span class="nv">s3</span><span class="p">,</span> <span class="nv">s5</span>
+</pre></div>
+</div>
+<p>For full list of supported instructions, refer to “SOPC Instructions” in ISA Manual.</p>
+</div>
+<div class="section" id="sopp">
+<h5><a class="toc-backref" href="#id100">SOPP</a><a class="headerlink" href="#sopp" title="Permalink to this headline">¶</a></h5>
+<div class="highlight-nasm"><div class="highlight"><pre><span class="nf">s_barrier</span>
+<span class="nf">s_nop</span> <span class="mi">2</span>
+<span class="nf">s_endpgm</span>
+<span class="nf">s_waitcnt</span> <span class="mi">0</span> <span class="c1">; Wait for all counters to be 0</span>
+<span class="nf">s_waitcnt</span> <span class="nv">vmcnt</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span> <span class="o">&</span> <span class="nv">expcnt</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span> <span class="o">&</span> <span class="nv">lgkmcnt</span><span class="p">(</span><span class="mi">0</span><span class="p">)</span> <span class="c1">; Equivalent to above</span>
+<span class="nf">s_waitcnt</span> <span class="nv">vmcnt</span><span class="p">(</span><span class="mi">1</span><span class="p">)</span> <span class="c1">; Wait for vmcnt counter to be 1.</span>
+<span class="nf">s_sethalt</span> <span class="mi">9</span>
+<span class="nf">s_sleep</span> <span class="mi">10</span>
+<span class="nf">s_sendmsg</span> <span class="mh">0x1</span>
+<span class="nf">s_sendmsg</span> <span class="nv">sendmsg</span><span class="p">(</span><span class="nv">MSG_INTERRUPT</span><span class="p">)</span>
+<span class="nf">s_trap</span> <span class="mi">1</span>
+</pre></div>
+</div>
+<p>For full list of supported instructions, refer to “SOPP Instructions” in ISA Manual.</p>
+<p>Unless otherwise mentioned, little verification is performed on the operands
+of SOPP Instructions, so it is up to the programmer to be familiar with the
+range or acceptable values.</p>
+</div>
+<div class="section" id="valu">
+<h5><a class="toc-backref" href="#id101">VALU</a><a class="headerlink" href="#valu" title="Permalink to this headline">¶</a></h5>
+<p>For vector ALU instruction opcodes (VOP1, VOP2, VOP3, VOPC, VOP_DPP, VOP_SDWA),
+the assembler will automatically use optimal encoding based on its operands.
+To force specific encoding, one can add a suffix to the opcode of the instruction:</p>
+<ul class="simple">
+<li>_e32 for 32-bit VOP1/VOP2/VOPC</li>
+<li>_e64 for 64-bit VOP3</li>
+<li>_dpp for VOP_DPP</li>
+<li>_sdwa for VOP_SDWA</li>
+</ul>
+<p>VOP1/VOP2/VOP3/VOPC examples:</p>
+<div class="highlight-nasm"><div class="highlight"><pre><span class="nf">v_mov_b32</span> <span class="nv">v1</span><span class="p">,</span> <span class="nv">v2</span>
+<span class="nf">v_mov_b32_e32</span> <span class="nv">v1</span><span class="p">,</span> <span class="nv">v2</span>
+<span class="nf">v_nop</span>
+<span class="nf">v_cvt_f64_i32_e32</span> <span class="nv">v</span><span class="p">[</span><span class="mi">1</span><span class="p">:</span><span class="mi">2</span><span class="p">],</span> <span class="nv">v2</span>
+<span class="nf">v_floor_f32_e32</span> <span class="nv">v1</span><span class="p">,</span> <span class="nv">v2</span>
+<span class="nf">v_bfrev_b32_e32</span> <span class="nv">v1</span><span class="p">,</span> <span class="nv">v2</span>
+<span class="nf">v_add_f32_e32</span> <span class="nv">v1</span><span class="p">,</span> <span class="nv">v2</span><span class="p">,</span> <span class="nv">v3</span>
+<span class="nf">v_mul_i32_i24_e64</span> <span class="nv">v1</span><span class="p">,</span> <span class="nv">v2</span><span class="p">,</span> <span class="mi">3</span>
+<span class="nf">v_mul_i32_i24_e32</span> <span class="nv">v1</span><span class="p">,</span> <span class="o">-</span><span class="mi">3</span><span class="p">,</span> <span class="nv">v3</span>
+<span class="nf">v_mul_i32_i24_e32</span> <span class="nv">v1</span><span class="p">,</span> <span class="o">-</span><span class="mi">100</span><span class="p">,</span> <span class="nv">v3</span>
+<span class="nf">v_addc_u32</span> <span class="nv">v1</span><span class="p">,</span> <span class="nv">s</span><span class="p">[</span><span class="mi">0</span><span class="p">:</span><span class="mi">1</span><span class="p">],</span> <span class="nv">v2</span><span class="p">,</span> <span class="nv">v3</span><span class="p">,</span> <span class="nv">s</span><span class="p">[</span><span class="mi">2</span><span class="p">:</span><span class="mi">3</span><span class="p">]</span>
+<span class="nf">v_max_f16_e32</span> <span class="nv">v1</span><span class="p">,</span> <span class="nv">v2</span><span class="p">,</span> <span class="nv">v3</span>
+</pre></div>
+</div>
+<p>VOP_DPP examples:</p>
+<div class="highlight-nasm"><div class="highlight"><pre><span class="nf">v_mov_b32</span> <span class="nv">v0</span><span class="p">,</span> <span class="nv">v0</span> <span class="nv">quad_perm</span><span class="p">:[</span><span class="mi">0</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">]</span>
+<span class="nf">v_sin_f32</span> <span class="nv">v0</span><span class="p">,</span> <span class="nv">v0</span> <span class="nv">row_shl</span><span class="p">:</span><span class="mi">1</span> <span class="nv">row_mask</span><span class="p">:</span><span class="mh">0xa</span> <span class="nv">bank_mask</span><span class="p">:</span><span class="mh">0x1</span> <span class="nv">bound_ctrl</span><span class="p">:</span><span class="mi">0</span>
+<span class="nf">v_mov_b32</span> <span class="nv">v0</span><span class="p">,</span> <span class="nv">v0</span> <span class="nv">wave_shl</span><span class="p">:</span><span class="mi">1</span>
+<span class="nf">v_mov_b32</span> <span class="nv">v0</span><span class="p">,</span> <span class="nv">v0</span> <span class="nv">row_mirror</span>
+<span class="nf">v_mov_b32</span> <span class="nv">v0</span><span class="p">,</span> <span class="nv">v0</span> <span class="nv">row_bcast</span><span class="p">:</span><span class="mi">31</span>
+<span class="nf">v_mov_b32</span> <span class="nv">v0</span><span class="p">,</span> <span class="nv">v0</span> <span class="nv">quad_perm</span><span class="p">:[</span><span class="mi">1</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">0</span><span class="p">,</span><span class="mi">1</span><span class="p">]</span> <span class="nv">row_mask</span><span class="p">:</span><span class="mh">0xa</span> <span class="nv">bank_mask</span><span class="p">:</span><span class="mh">0x1</span> <span class="nv">bound_ctrl</span><span class="p">:</span><span class="mi">0</span>
+<span class="nf">v_add_f32</span> <span class="nv">v0</span><span class="p">,</span> <span class="nv">v0</span><span class="p">,</span> <span class="o">|</span><span class="nv">v0</span><span class="o">|</span> <span class="nv">row_shl</span><span class="p">:</span><span class="mi">1</span> <span class="nv">row_mask</span><span class="p">:</span><span class="mh">0xa</span> <span class="nv">bank_mask</span><span class="p">:</span><span class="mh">0x1</span> <span class="nv">bound_ctrl</span><span class="p">:</span><span class="mi">0</span>
+<span class="nf">v_max_f16</span> <span class="nv">v1</span><span class="p">,</span> <span class="nv">v2</span><span class="p">,</span> <span class="nv">v3</span> <span class="nv">row_shl</span><span class="p">:</span><span class="mi">1</span> <span class="nv">row_mask</span><span class="p">:</span><span class="mh">0xa</span> <span class="nv">bank_mask</span><span class="p">:</span><span class="mh">0x1</span> <span class="nv">bound_ctrl</span><span class="p">:</span><span class="mi">0</span>
+</pre></div>
+</div>
+<p>VOP_SDWA examples:</p>
+<div class="highlight-nasm"><div class="highlight"><pre><span class="nf">v_mov_b32</span> <span class="nv">v1</span><span class="p">,</span> <span class="nv">v2</span> <span class="nb">ds</span><span class="nv">t_sel</span><span class="p">:</span><span class="kt">BYTE</span><span class="nv">_0</span> <span class="nb">ds</span><span class="nv">t_unused</span><span class="p">:</span><span class="nv">UNUSED_PRESERVE</span> <span class="nv">src0_sel</span><span class="p">:</span><span class="kt">DWORD</span>
+<span class="nf">v_min_u32</span> <span class="nv">v200</span><span class="p">,</span> <span class="nv">v200</span><span class="p">,</span> <span class="nv">v1</span> <span class="nb">ds</span><span class="nv">t_sel</span><span class="p">:</span><span class="kt">WORD</span><span class="nv">_1</span> <span class="nb">ds</span><span class="nv">t_unused</span><span class="p">:</span><span class="nv">UNUSED_PAD</span> <span class="nv">src0_sel</span><span class="p">:</span><span class="kt">BYTE</span><span class="nv">_1</span> <span class="nv">src1_sel</span><span class="p">:</span><span class="kt">DWORD</span>
+<span class="nf">v_sin_f32</span> <span class="nv">v0</span><span class="p">,</span> <span class="nv">v0</span> <span class="nb">ds</span><span class="nv">t_unused</span><span class="p">:</span><span class="nv">UNUSED_PAD</span> <span class="nv">src0_sel</span><span class="p">:</span><span class="kt">WORD</span><span class="nv">_1</span>
+<span class="nf">v_fract_f32</span> <span class="nv">v0</span><span class="p">,</span> <span class="o">|</span><span class="nv">v0</span><span class="o">|</span> <span class="nb">ds</span><span class="nv">t_sel</span><span class="p">:</span><span class="kt">DWORD</span> <span class="nb">ds</span><span class="nv">t_unused</span><span class="p">:</span><span class="nv">UNUSED_PAD</span> <span class="nv">src0_sel</span><span class="p">:</span><span class="kt">WORD</span><span class="nv">_1</span>
+<span class="nf">v_cmpx_le_u32</span> <span class="nv">vcc</span><span class="p">,</span> <span class="nv">v1</span><span class="p">,</span> <span class="nv">v2</span> <span class="nv">src0_sel</span><span class="p">:</span><span class="kt">BYTE</span><span class="nv">_2</span> <span class="nv">src1_sel</span><span class="p">:</span><span class="kt">WORD</span><span class="nv">_0</span>
+</pre></div>
+</div>
+<p>For full list of supported instructions, refer to “Vector ALU instructions”.</p>
+</div>
+</div>
+<div class="section" id="hsa-code-object-directives">
+<h4><a class="toc-backref" href="#id102">HSA Code Object Directives</a><a class="headerlink" href="#hsa-code-object-directives" title="Permalink to this headline">¶</a></h4>
+<p>AMDGPU ABI defines auxiliary data in output code object. In assembly source,
+one can specify them with assembler directives.</p>
+<div class="section" id="hsa-code-object-version-major-minor">
+<h5><a class="toc-backref" href="#id103">.hsa_code_object_version major, minor</a><a class="headerlink" href="#hsa-code-object-version-major-minor" title="Permalink to this headline">¶</a></h5>
+<p><em>major</em> and <em>minor</em> are integers that specify the version of the HSA code
+object that will be generated by the assembler.</p>
+</div>
+<div class="section" id="hsa-code-object-isa-major-minor-stepping-vendor-arch">
+<h5><a class="toc-backref" href="#id104">.hsa_code_object_isa [major, minor, stepping, vendor, arch]</a><a class="headerlink" href="#hsa-code-object-isa-major-minor-stepping-vendor-arch" title="Permalink to this headline">¶</a></h5>
+<p><em>major</em>, <em>minor</em>, and <em>stepping</em> are all integers that describe the instruction
+set architecture (ISA) version of the assembly program.</p>
+<p><em>vendor</em> and <em>arch</em> are quoted strings. <em>vendor</em> should always be equal to
+“AMD” and <em>arch</em> should always be equal to “AMDGPU”.</p>
+<p>By default, the assembler will derive the ISA version, <em>vendor</em>, and <em>arch</em>
+from the value of the -mcpu option that is passed to the assembler.</p>
+</div>
+<div class="section" id="amdgpu-hsa-kernel-name">
+<h5><a class="toc-backref" href="#id105">.amdgpu_hsa_kernel (name)</a><a class="headerlink" href="#amdgpu-hsa-kernel-name" title="Permalink to this headline">¶</a></h5>
+<p>This directives specifies that the symbol with given name is a kernel entry point
+(label) and the object should contain corresponding symbol of type STT_AMDGPU_HSA_KERNEL.</p>
+</div>
+<div class="section" id="amd-kernel-code-t">
+<h5><a class="toc-backref" href="#id106">.amd_kernel_code_t</a><a class="headerlink" href="#amd-kernel-code-t" title="Permalink to this headline">¶</a></h5>
+<p>This directive marks the beginning of a list of key / value pairs that are used
+to specify the amd_kernel_code_t object that will be emitted by the assembler.
+The list must be terminated by the <em>.end_amd_kernel_code_t</em> directive. For
+any amd_kernel_code_t values that are unspecified a default value will be
+used. The default value for all keys is 0, with the following exceptions:</p>
+<ul class="simple">
+<li><em>kernel_code_version_major</em> defaults to 1.</li>
+<li><em>machine_kind</em> defaults to 1.</li>
+<li><em>machine_version_major</em>, <em>machine_version_minor</em>, and
+<em>machine_version_stepping</em> are derived from the value of the -mcpu option
+that is passed to the assembler.</li>
+<li><em>kernel_code_entry_byte_offset</em> defaults to 256.</li>
+<li><em>wavefront_size</em> defaults to 6.</li>
+<li><em>kernarg_segment_alignment</em>, <em>group_segment_alignment</em>, and
+<em>private_segment_alignment</em> default to 4. Note that alignments are specified
+as a power of two, so a value of <strong>n</strong> means an alignment of 2^ <strong>n</strong>.</li>
+</ul>
+<p>The <em>.amd_kernel_code_t</em> directive must be placed immediately after the
+function label and before any instructions.</p>
+<p>For a full list of amd_kernel_code_t keys, refer to AMDGPU ABI document,
+comments in lib/Target/AMDGPU/AmdKernelCodeT.h and test/CodeGen/AMDGPU/hsa.s.</p>
+<p>Here is an example of a minimal amd_kernel_code_t specification:</p>
+<div class="highlight-none"><div class="highlight"><pre>.hsa_code_object_version 1,0
+.hsa_code_object_isa
+
+.hsatext
+.globl hello_world
+.p2align 8
+.amdgpu_hsa_kernel hello_world
+
+hello_world:
+
+ .amd_kernel_code_t
+ enable_sgpr_kernarg_segment_ptr = 1
+ is_ptr64 = 1
+ compute_pgm_rsrc1_vgprs = 0
+ compute_pgm_rsrc1_sgprs = 0
+ compute_pgm_rsrc2_user_sgpr = 2
+ kernarg_segment_byte_size = 8
+ wavefront_sgpr_count = 2
+ workitem_vgpr_count = 3
+ .end_amd_kernel_code_t
+
+ s_load_dwordx2 s[0:1], s[0:1] 0x0
+ v_mov_b32 v0, 3.14159
+ s_waitcnt lgkmcnt(0)
+ v_mov_b32 v1, s0
+ v_mov_b32 v2, s1
+ flat_store_dword v[1:2], v0
+ s_endpgm
+.Lfunc_end0:
+ .size hello_world, .Lfunc_end0-hello_world
+</pre></div>
+</div>
+</div>
+</div>
+<div class="section" id="predefined-symbols-mattr-code-object-v3">
+<h4><a class="toc-backref" href="#id107">Predefined Symbols (-mattr=+code-object-v3)</a><a class="headerlink" href="#predefined-symbols-mattr-code-object-v3" title="Permalink to this headline">¶</a></h4>
+<p>The AMDGPU assembler defines and updates some symbols automatically. These
+symbols do not affect code generation.</p>
+<div class="section" id="amdgcn-gfx-generation-number">
+<h5><a class="toc-backref" href="#id108">.amdgcn.gfx_generation_number</a><a class="headerlink" href="#amdgcn-gfx-generation-number" title="Permalink to this headline">¶</a></h5>
+<p>Set to the GFX generation number of the target being assembled for. For
+example, when assembling for a “GFX9” target this will be set to the integer
+value “9”. The possible GFX generation numbers are presented in
+<a class="reference internal" href="#amdgpu-processors"><em>Processors</em></a>.</p>
+</div>
+<div class="section" id="amdgcn-next-free-vgpr">
+<h5><a class="toc-backref" href="#id109">.amdgcn.next_free_vgpr</a><a class="headerlink" href="#amdgcn-next-free-vgpr" title="Permalink to this headline">¶</a></h5>
+<p>Set to zero before assembly begins. At each instruction, if the current value
+of this symbol is less than or equal to the maximum VGPR number explicitly
+referenced within that instruction then the symbol value is updated to equal
+that VGPR number plus one.</p>
+<p>May be used to set the <cite>.amdhsa_next_free_vpgr</cite> directive in
+<a class="reference internal" href="#amdhsa-kernel-directives-table"><em>AMDHSA Kernel Assembler Directives</em></a>.</p>
+<p>May be set at any time, e.g. manually set to zero at the start of each kernel.</p>
+</div>
+<div class="section" id="amdgcn-next-free-sgpr">
+<h5><a class="toc-backref" href="#id110">.amdgcn.next_free_sgpr</a><a class="headerlink" href="#amdgcn-next-free-sgpr" title="Permalink to this headline">¶</a></h5>
+<p>Set to zero before assembly begins. At each instruction, if the current value
+of this symbol is less than or equal the maximum SGPR number explicitly
+referenced within that instruction then the symbol value is updated to equal
+that SGPR number plus one.</p>
+<p>May be used to set the <cite>.amdhsa_next_free_spgr</cite> directive in
+<a class="reference internal" href="#amdhsa-kernel-directives-table"><em>AMDHSA Kernel Assembler Directives</em></a>.</p>
+<p>May be set at any time, e.g. manually set to zero at the start of each kernel.</p>
+</div>
+</div>
+<div class="section" id="code-object-directives-mattr-code-object-v3">
+<h4><a class="toc-backref" href="#id111">Code Object Directives (-mattr=+code-object-v3)</a><a class="headerlink" href="#code-object-directives-mattr-code-object-v3" title="Permalink to this headline">¶</a></h4>
+<p>Directives which begin with <tt class="docutils literal"><span class="pre">.amdgcn</span></tt> are valid for all <tt class="docutils literal"><span class="pre">amdgcn</span></tt>
+architecture processors, and are not OS-specific. Directives which begin with
+<tt class="docutils literal"><span class="pre">.amdhsa</span></tt> are specific to <tt class="docutils literal"><span class="pre">amdgcn</span></tt> architecture processors when the
+<tt class="docutils literal"><span class="pre">amdhsa</span></tt> OS is specified. See <a class="reference internal" href="#amdgpu-target-triples"><em>Target Triples</em></a> and
+<a class="reference internal" href="#amdgpu-processors"><em>Processors</em></a>.</p>
+<div class="section" id="amdgcn-target-target">
+<h5><a class="toc-backref" href="#id112">.amdgcn_target <target></a><a class="headerlink" href="#amdgcn-target-target" title="Permalink to this headline">¶</a></h5>
+<p>Optional directive which declares the target supported by the containing
+assembler source file. Valid values are described in
+<a class="reference internal" href="#amdgpu-amdhsa-code-object-target-identification"><em>Code Object Target Identification</em></a>. Used by the assembler
+to validate command-line options such as <tt class="docutils literal"><span class="pre">-triple</span></tt>, <tt class="docutils literal"><span class="pre">-mcpu</span></tt>, and those
+which specify target features.</p>
+</div>
+<div class="section" id="amdhsa-kernel-name">
+<h5><a class="toc-backref" href="#id113">.amdhsa_kernel <name></a><a class="headerlink" href="#amdhsa-kernel-name" title="Permalink to this headline">¶</a></h5>
+<p>Creates a correctly aligned AMDHSA kernel descriptor and a symbol,
+<tt class="docutils literal"><span class="pre"><name>.kd</span></tt>, in the current location of the current section. Only valid when
+the OS is <tt class="docutils literal"><span class="pre">amdhsa</span></tt>. <tt class="docutils literal"><span class="pre"><name></span></tt> must be a symbol that labels the first
+instruction to execute, and does not need to be previously defined.</p>
+<p>Marks the beginning of a list of directives used to generate the bytes of a
+kernel descriptor, as described in <a class="reference internal" href="#amdgpu-amdhsa-kernel-descriptor"><em>Kernel Descriptor</em></a>.
+Directives which may appear in this list are described in
+<a class="reference internal" href="#amdhsa-kernel-directives-table"><em>AMDHSA Kernel Assembler Directives</em></a>. Directives may appear in any order, must
+be valid for the target being assembled for, and cannot be repeated. Directives
+support the range of values specified by the field they reference in
+<a class="reference internal" href="#amdgpu-amdhsa-kernel-descriptor"><em>Kernel Descriptor</em></a>. If a directive is not specified, it is
+assumed to have its default value, unless it is marked as “Required”, in which
+case it is an error to omit the directive. This list of directives is
+terminated by an <tt class="docutils literal"><span class="pre">.end_amdhsa_kernel</span></tt> directive.</p>
+<blockquote>
+<div><table border="1" class="docutils" id="amdhsa-kernel-directives-table">
+<caption>AMDHSA Kernel Assembler Directives</caption>
+<colgroup>
+<col width="35%" />
+<col width="10%" />
+<col width="8%" />
+<col width="47%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head">Directive</th>
+<th class="head">Default</th>
+<th class="head">Supported On</th>
+<th class="head">Description</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">.amdhsa_group_segment_fixed_size</span></tt></td>
+<td>0</td>
+<td>GFX6-GFX9</td>
+<td>Controls GROUP_SEGMENT_FIXED_SIZE in
+<a class="reference internal" href="#amdgpu-amdhsa-kernel-descriptor-gfx6-gfx9-table"><em>Kernel Descriptor for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">.amdhsa_private_segment_fixed_size</span></tt></td>
+<td>0</td>
+<td>GFX6-GFX9</td>
+<td>Controls PRIVATE_SEGMENT_FIXED_SIZE in
+<a class="reference internal" href="#amdgpu-amdhsa-kernel-descriptor-gfx6-gfx9-table"><em>Kernel Descriptor for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">.amdhsa_user_sgpr_private_segment_buffer</span></tt></td>
+<td>0</td>
+<td>GFX6-GFX9</td>
+<td>Controls ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER in
+<a class="reference internal" href="#amdgpu-amdhsa-kernel-descriptor-gfx6-gfx9-table"><em>Kernel Descriptor for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">.amdhsa_user_sgpr_dispatch_ptr</span></tt></td>
+<td>0</td>
+<td>GFX6-GFX9</td>
+<td>Controls ENABLE_SGPR_DISPATCH_PTR in
+<a class="reference internal" href="#amdgpu-amdhsa-kernel-descriptor-gfx6-gfx9-table"><em>Kernel Descriptor for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">.amdhsa_user_sgpr_queue_ptr</span></tt></td>
+<td>0</td>
+<td>GFX6-GFX9</td>
+<td>Controls ENABLE_SGPR_QUEUE_PTR in
+<a class="reference internal" href="#amdgpu-amdhsa-kernel-descriptor-gfx6-gfx9-table"><em>Kernel Descriptor for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">.amdhsa_user_sgpr_kernarg_segment_ptr</span></tt></td>
+<td>0</td>
+<td>GFX6-GFX9</td>
+<td>Controls ENABLE_SGPR_KERNARG_SEGMENT_PTR in
+<a class="reference internal" href="#amdgpu-amdhsa-kernel-descriptor-gfx6-gfx9-table"><em>Kernel Descriptor for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">.amdhsa_user_sgpr_dispatch_id</span></tt></td>
+<td>0</td>
+<td>GFX6-GFX9</td>
+<td>Controls ENABLE_SGPR_DISPATCH_ID in
+<a class="reference internal" href="#amdgpu-amdhsa-kernel-descriptor-gfx6-gfx9-table"><em>Kernel Descriptor for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">.amdhsa_user_sgpr_flat_scratch_init</span></tt></td>
+<td>0</td>
+<td>GFX6-GFX9</td>
+<td>Controls ENABLE_SGPR_FLAT_SCRATCH_INIT in
+<a class="reference internal" href="#amdgpu-amdhsa-kernel-descriptor-gfx6-gfx9-table"><em>Kernel Descriptor for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">.amdhsa_user_sgpr_private_segment_size</span></tt></td>
+<td>0</td>
+<td>GFX6-GFX9</td>
+<td>Controls ENABLE_SGPR_PRIVATE_SEGMENT_SIZE in
+<a class="reference internal" href="#amdgpu-amdhsa-kernel-descriptor-gfx6-gfx9-table"><em>Kernel Descriptor for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">.amdhsa_system_sgpr_private_segment_wavefront_offset</span></tt></td>
+<td>0</td>
+<td>GFX6-GFX9</td>
+<td>Controls ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET in
+<a class="reference internal" href="#amdgpu-amdhsa-compute-pgm-rsrc2-gfx6-gfx9-table"><em>compute_pgm_rsrc2 for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">.amdhsa_system_sgpr_workgroup_id_x</span></tt></td>
+<td>1</td>
+<td>GFX6-GFX9</td>
+<td>Controls ENABLE_SGPR_WORKGROUP_ID_X in
+<a class="reference internal" href="#amdgpu-amdhsa-compute-pgm-rsrc2-gfx6-gfx9-table"><em>compute_pgm_rsrc2 for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">.amdhsa_system_sgpr_workgroup_id_y</span></tt></td>
+<td>0</td>
+<td>GFX6-GFX9</td>
+<td>Controls ENABLE_SGPR_WORKGROUP_ID_Y in
+<a class="reference internal" href="#amdgpu-amdhsa-compute-pgm-rsrc2-gfx6-gfx9-table"><em>compute_pgm_rsrc2 for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">.amdhsa_system_sgpr_workgroup_id_z</span></tt></td>
+<td>0</td>
+<td>GFX6-GFX9</td>
+<td>Controls ENABLE_SGPR_WORKGROUP_ID_Z in
+<a class="reference internal" href="#amdgpu-amdhsa-compute-pgm-rsrc2-gfx6-gfx9-table"><em>compute_pgm_rsrc2 for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">.amdhsa_system_sgpr_workgroup_info</span></tt></td>
+<td>0</td>
+<td>GFX6-GFX9</td>
+<td>Controls ENABLE_SGPR_WORKGROUP_INFO in
+<a class="reference internal" href="#amdgpu-amdhsa-compute-pgm-rsrc2-gfx6-gfx9-table"><em>compute_pgm_rsrc2 for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">.amdhsa_system_vgpr_workitem_id</span></tt></td>
+<td>0</td>
+<td>GFX6-GFX9</td>
+<td>Controls ENABLE_VGPR_WORKITEM_ID in
+<a class="reference internal" href="#amdgpu-amdhsa-compute-pgm-rsrc2-gfx6-gfx9-table"><em>compute_pgm_rsrc2 for GFX6-GFX9</em></a>.
+Possible values are defined in
+<a class="reference internal" href="#amdgpu-amdhsa-system-vgpr-work-item-id-enumeration-values-table"><em>System VGPR Work-Item ID Enumeration Values</em></a>.</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">.amdhsa_next_free_vgpr</span></tt></td>
+<td>Required</td>
+<td>GFX6-GFX9</td>
+<td>Maximum VGPR number explicitly referenced, plus one.
+Used to calculate GRANULATED_WORKITEM_VGPR_COUNT in
+<a class="reference internal" href="#amdgpu-amdhsa-compute-pgm-rsrc1-gfx6-gfx9-table"><em>compute_pgm_rsrc1 for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">.amdhsa_next_free_sgpr</span></tt></td>
+<td>Required</td>
+<td>GFX6-GFX9</td>
+<td>Maximum SGPR number explicitly referenced, plus one.
+Used to calculate GRANULATED_WAVEFRONT_SGPR_COUNT in
+<a class="reference internal" href="#amdgpu-amdhsa-compute-pgm-rsrc1-gfx6-gfx9-table"><em>compute_pgm_rsrc1 for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">.amdhsa_reserve_vcc</span></tt></td>
+<td>1</td>
+<td>GFX6-GFX9</td>
+<td>Whether the kernel may use the special VCC SGPR.
+Used to calculate GRANULATED_WAVEFRONT_SGPR_COUNT in
+<a class="reference internal" href="#amdgpu-amdhsa-compute-pgm-rsrc1-gfx6-gfx9-table"><em>compute_pgm_rsrc1 for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">.amdhsa_reserve_flat_scratch</span></tt></td>
+<td>1</td>
+<td>GFX7-GFX9</td>
+<td>Whether the kernel may use flat instructions to access
+scratch memory. Used to calculate
+GRANULATED_WAVEFRONT_SGPR_COUNT in
+<a class="reference internal" href="#amdgpu-amdhsa-compute-pgm-rsrc1-gfx6-gfx9-table"><em>compute_pgm_rsrc1 for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">.amdhsa_reserve_xnack_mask</span></tt></td>
+<td>Target
+Feature
+Specific
+(+xnack)</td>
+<td>GFX8-GFX9</td>
+<td>Whether the kernel may trigger XNACK replay.
+Used to calculate GRANULATED_WAVEFRONT_SGPR_COUNT in
+<a class="reference internal" href="#amdgpu-amdhsa-compute-pgm-rsrc1-gfx6-gfx9-table"><em>compute_pgm_rsrc1 for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">.amdhsa_float_round_mode_32</span></tt></td>
+<td>0</td>
+<td>GFX6-GFX9</td>
+<td>Controls FLOAT_ROUND_MODE_32 in
+<a class="reference internal" href="#amdgpu-amdhsa-compute-pgm-rsrc1-gfx6-gfx9-table"><em>compute_pgm_rsrc1 for GFX6-GFX9</em></a>.
+Possible values are defined in
+<a class="reference internal" href="#amdgpu-amdhsa-floating-point-rounding-mode-enumeration-values-table"><em>Floating Point Rounding Mode Enumeration Values</em></a>.</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">.amdhsa_float_round_mode_16_64</span></tt></td>
+<td>0</td>
+<td>GFX6-GFX9</td>
+<td>Controls FLOAT_ROUND_MODE_16_64 in
+<a class="reference internal" href="#amdgpu-amdhsa-compute-pgm-rsrc1-gfx6-gfx9-table"><em>compute_pgm_rsrc1 for GFX6-GFX9</em></a>.
+Possible values are defined in
+<a class="reference internal" href="#amdgpu-amdhsa-floating-point-rounding-mode-enumeration-values-table"><em>Floating Point Rounding Mode Enumeration Values</em></a>.</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">.amdhsa_float_denorm_mode_32</span></tt></td>
+<td>0</td>
+<td>GFX6-GFX9</td>
+<td>Controls FLOAT_DENORM_MODE_32 in
+<a class="reference internal" href="#amdgpu-amdhsa-compute-pgm-rsrc1-gfx6-gfx9-table"><em>compute_pgm_rsrc1 for GFX6-GFX9</em></a>.
+Possible values are defined in
+<a class="reference internal" href="#amdgpu-amdhsa-floating-point-denorm-mode-enumeration-values-table"><em>Floating Point Denorm Mode Enumeration Values</em></a>.</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">.amdhsa_float_denorm_mode_16_64</span></tt></td>
+<td>3</td>
+<td>GFX6-GFX9</td>
+<td>Controls FLOAT_DENORM_MODE_16_64 in
+<a class="reference internal" href="#amdgpu-amdhsa-compute-pgm-rsrc1-gfx6-gfx9-table"><em>compute_pgm_rsrc1 for GFX6-GFX9</em></a>.
+Possible values are defined in
+<a class="reference internal" href="#amdgpu-amdhsa-floating-point-denorm-mode-enumeration-values-table"><em>Floating Point Denorm Mode Enumeration Values</em></a>.</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">.amdhsa_dx10_clamp</span></tt></td>
+<td>1</td>
+<td>GFX6-GFX9</td>
+<td>Controls ENABLE_DX10_CLAMP in
+<a class="reference internal" href="#amdgpu-amdhsa-compute-pgm-rsrc1-gfx6-gfx9-table"><em>compute_pgm_rsrc1 for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">.amdhsa_ieee_mode</span></tt></td>
+<td>1</td>
+<td>GFX6-GFX9</td>
+<td>Controls ENABLE_IEEE_MODE in
+<a class="reference internal" href="#amdgpu-amdhsa-compute-pgm-rsrc1-gfx6-gfx9-table"><em>compute_pgm_rsrc1 for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">.amdhsa_fp16_overflow</span></tt></td>
+<td>0</td>
+<td>GFX9</td>
+<td>Controls FP16_OVFL in
+<a class="reference internal" href="#amdgpu-amdhsa-compute-pgm-rsrc1-gfx6-gfx9-table"><em>compute_pgm_rsrc1 for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">.amdhsa_exception_fp_ieee_invalid_op</span></tt></td>
+<td>0</td>
+<td>GFX6-GFX9</td>
+<td>Controls ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION in
+<a class="reference internal" href="#amdgpu-amdhsa-compute-pgm-rsrc2-gfx6-gfx9-table"><em>compute_pgm_rsrc2 for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">.amdhsa_exception_fp_denorm_src</span></tt></td>
+<td>0</td>
+<td>GFX6-GFX9</td>
+<td>Controls ENABLE_EXCEPTION_FP_DENORMAL_SOURCE in
+<a class="reference internal" href="#amdgpu-amdhsa-compute-pgm-rsrc2-gfx6-gfx9-table"><em>compute_pgm_rsrc2 for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">.amdhsa_exception_fp_ieee_div_zero</span></tt></td>
+<td>0</td>
+<td>GFX6-GFX9</td>
+<td>Controls ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO in
+<a class="reference internal" href="#amdgpu-amdhsa-compute-pgm-rsrc2-gfx6-gfx9-table"><em>compute_pgm_rsrc2 for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">.amdhsa_exception_fp_ieee_overflow</span></tt></td>
+<td>0</td>
+<td>GFX6-GFX9</td>
+<td>Controls ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW in
+<a class="reference internal" href="#amdgpu-amdhsa-compute-pgm-rsrc2-gfx6-gfx9-table"><em>compute_pgm_rsrc2 for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">.amdhsa_exception_fp_ieee_underflow</span></tt></td>
+<td>0</td>
+<td>GFX6-GFX9</td>
+<td>Controls ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW in
+<a class="reference internal" href="#amdgpu-amdhsa-compute-pgm-rsrc2-gfx6-gfx9-table"><em>compute_pgm_rsrc2 for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-even"><td><tt class="docutils literal"><span class="pre">.amdhsa_exception_fp_ieee_inexact</span></tt></td>
+<td>0</td>
+<td>GFX6-GFX9</td>
+<td>Controls ENABLE_EXCEPTION_IEEE_754_FP_INEXACT in
+<a class="reference internal" href="#amdgpu-amdhsa-compute-pgm-rsrc2-gfx6-gfx9-table"><em>compute_pgm_rsrc2 for GFX6-GFX9</em></a>.</td>
+</tr>
+<tr class="row-odd"><td><tt class="docutils literal"><span class="pre">.amdhsa_exception_int_div_zero</span></tt></td>
+<td>0</td>
+<td>GFX6-GFX9</td>
+<td>Controls ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO in
+<a class="reference internal" href="#amdgpu-amdhsa-compute-pgm-rsrc2-gfx6-gfx9-table"><em>compute_pgm_rsrc2 for GFX6-GFX9</em></a>.</td>
+</tr>
+</tbody>
+</table>
+</div></blockquote>
+</div>
+</div>
+<div class="section" id="example-hsa-source-code-mattr-code-object-v3">
+<h4><a class="toc-backref" href="#id114">Example HSA Source Code (-mattr=+code-object-v3)</a><a class="headerlink" href="#example-hsa-source-code-mattr-code-object-v3" title="Permalink to this headline">¶</a></h4>
+<p>Here is an example of a minimal assembly source file, defining one HSA kernel:</p>
+<div class="highlight-nasm"><pre>.amdgcn_target "amdgcn-amd-amdhsa--gfx900+xnack" // optional
+
+.text
+.globl hello_world
+.p2align 8
+.type hello_world, at function
+hello_world:
+ s_load_dwordx2 s[0:1], s[0:1] 0x0
+ v_mov_b32 v0, 3.14159
+ s_waitcnt lgkmcnt(0)
+ v_mov_b32 v1, s0
+ v_mov_b32 v2, s1
+ flat_store_dword v[1:2], v0
+ s_endpgm
+.Lfunc_end0:
+ .size hello_world, .Lfunc_end0-hello_world
+
+.rodata
+.p2align 6
+.amdhsa_kernel hello_world
+ .amdhsa_user_sgpr_kernarg_segment_ptr 1
+ .amdhsa_next_free_vgpr .amdgcn.next_free_vgpr
+ .amdhsa_next_free_sgpr .amdgcn.next_free_sgpr
+.end_amdhsa_kernel</pre>
+</div>
+</div>
+</div>
+</div>
+<div class="section" id="additional-documentation">
+<h2><a class="toc-backref" href="#id115">Additional Documentation</a><a class="headerlink" href="#additional-documentation" title="Permalink to this headline">¶</a></h2>
+<table class="docutils citation" frame="void" id="amd-radeon-hd-2000-3000" rules="none">
+<colgroup><col class="label" /><col /></colgroup>
+<tbody valign="top">
+<tr><td class="label"><a class="fn-backref" href="#id3">[AMD-RADEON-HD-2000-3000]</a></td><td><a class="reference external" href="http://developer.amd.com/wordpress/media/2012/10/R600_Instruction_Set_Architecture.pdf">AMD R6xx shader ISA</a></td></tr>
+</tbody>
+</table>
+<table class="docutils citation" frame="void" id="amd-radeon-hd-4000" rules="none">
+<colgroup><col class="label" /><col /></colgroup>
+<tbody valign="top">
+<tr><td class="label"><a class="fn-backref" href="#id4">[AMD-RADEON-HD-4000]</a></td><td><a class="reference external" href="http://developer.amd.com/wordpress/media/2012/10/R700-Family_Instruction_Set_Architecture.pdf">AMD R7xx shader ISA</a></td></tr>
+</tbody>
+</table>
+<table class="docutils citation" frame="void" id="amd-radeon-hd-5000" rules="none">
+<colgroup><col class="label" /><col /></colgroup>
+<tbody valign="top">
+<tr><td class="label"><a class="fn-backref" href="#id5">[AMD-RADEON-HD-5000]</a></td><td><a class="reference external" href="http://developer.amd.com/wordpress/media/2012/10/AMD_Evergreen-Family_Instruction_Set_Architecture.pdf">AMD Evergreen shader ISA</a></td></tr>
+</tbody>
+</table>
+<table class="docutils citation" frame="void" id="amd-radeon-hd-6000" rules="none">
+<colgroup><col class="label" /><col /></colgroup>
+<tbody valign="top">
+<tr><td class="label"><a class="fn-backref" href="#id6">[AMD-RADEON-HD-6000]</a></td><td><a class="reference external" href="http://developer.amd.com/wordpress/media/2012/10/AMD_HD_6900_Series_Instruction_Set_Architecture.pdf">AMD Cayman/Trinity shader ISA</a></td></tr>
+</tbody>
+</table>
+<table class="docutils citation" frame="void" id="amd-gcn-gfx6" rules="none">
+<colgroup><col class="label" /><col /></colgroup>
+<tbody valign="top">
+<tr><td class="label">[AMD-GCN-GFX6]</td><td><em>(<a class="fn-backref" href="#id7">1</a>, <a class="fn-backref" href="#id36">2</a>)</em> <a class="reference external" href="http://developer.amd.com/wordpress/media/2012/12/AMD_Southern_Islands_Instruction_Set_Architecture.pdf">AMD Southern Islands Series ISA</a></td></tr>
+</tbody>
+</table>
+<table class="docutils citation" frame="void" id="amd-gcn-gfx7" rules="none">
+<colgroup><col class="label" /><col /></colgroup>
+<tbody valign="top">
+<tr><td class="label">[AMD-GCN-GFX7]</td><td><em>(<a class="fn-backref" href="#id8">1</a>, <a class="fn-backref" href="#id37">2</a>)</em> <a class="reference external" href="http://developer.amd.com/wordpress/media/2013/07/AMD_Sea_Islands_Instruction_Set_Architecture.pdf">AMD Sea Islands Series ISA</a></td></tr>
+</tbody>
+</table>
+<table class="docutils citation" frame="void" id="amd-gcn-gfx8" rules="none">
+<colgroup><col class="label" /><col /></colgroup>
+<tbody valign="top">
+<tr><td class="label">[AMD-GCN-GFX8]</td><td><em>(<a class="fn-backref" href="#id9">1</a>, <a class="fn-backref" href="#id38">2</a>)</em> <a class="reference external" href="http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/12/AMD_GCN3_Instruction_Set_Architecture_rev1.1.pdf">AMD GCN3 Instruction Set Architecture</a></td></tr>
+</tbody>
+</table>
+<table class="docutils citation" frame="void" id="amd-gcn-gfx9" rules="none">
+<colgroup><col class="label" /><col /></colgroup>
+<tbody valign="top">
+<tr><td class="label">[AMD-GCN-GFX9]</td><td><em>(<a class="fn-backref" href="#id10">1</a>, <a class="fn-backref" href="#id39">2</a>)</em> <a class="reference external" href="http://developer.amd.com/wordpress/media/2013/12/Vega_Shader_ISA_28July2017.pdf">AMD “Vega” Instruction Set Architecture</a></td></tr>
+</tbody>
+</table>
+<table class="docutils citation" frame="void" id="amd-rocm" rules="none">
+<colgroup><col class="label" /><col /></colgroup>
+<tbody valign="top">
+<tr><td class="label">[AMD-ROCm]</td><td><em>(<a class="fn-backref" href="#id2">1</a>, <a class="fn-backref" href="#id21">2</a>, <a class="fn-backref" href="#id24">3</a>, <a class="fn-backref" href="#id34">4</a>)</em> <a class="reference external" href="http://gpuopen.com/compute-product/rocm/">ROCm: Open Platform for Development, Discovery and Education Around GPU Computing</a></td></tr>
+</tbody>
+</table>
+<table class="docutils citation" frame="void" id="amd-rocm-github" rules="none">
+<colgroup><col class="label" /><col /></colgroup>
+<tbody valign="top">
+<tr><td class="label">[AMD-ROCm-github]</td><td><em>(<a class="fn-backref" href="#id29">1</a>, <a class="fn-backref" href="#id30">2</a>)</em> <a class="reference external" href="http://github.com/RadeonOpenCompute">ROCm github</a></td></tr>
+</tbody>
+</table>
+<table class="docutils citation" frame="void" id="hsa" rules="none">
+<colgroup><col class="label" /><col /></colgroup>
+<tbody valign="top">
+<tr><td class="label">[HSA]</td><td><em>(<a class="fn-backref" href="#id1">1</a>, <a class="fn-backref" href="#id11">2</a>, <a class="fn-backref" href="#id20">3</a>, <a class="fn-backref" href="#id23">4</a>, <a class="fn-backref" href="#id26">5</a>, <a class="fn-backref" href="#id27">6</a>, <a class="fn-backref" href="#id28">7</a>, <a class="fn-backref" href="#id31">8</a>, <a class="fn-backref" href="#id33">9</a>)</em> <a class="reference external" href="http://www.hsafoundation.com/">Heterogeneous System Architecture (HSA) Foundation</a></td></tr>
+</tbody>
+</table>
+<table class="docutils citation" frame="void" id="elf" rules="none">
+<colgroup><col class="label" /><col /></colgroup>
+<tbody valign="top">
+<tr><td class="label">[ELF]</td><td><em>(<a class="fn-backref" href="#id18">1</a>, <a class="fn-backref" href="#id19">2</a>)</em> <a class="reference external" href="http://www.sco.com/developers/gabi/">Executable and Linkable Format (ELF)</a></td></tr>
+</tbody>
+</table>
+<table class="docutils citation" frame="void" id="id40" rules="none">
+<colgroup><col class="label" /><col /></colgroup>
+<tbody valign="top">
+<tr><td class="label"><a class="fn-backref" href="#id22">[DWARF]</a></td><td><a class="reference external" href="http://dwarfstd.org/">DWARF Debugging Information Format</a></td></tr>
+</tbody>
+</table>
+<table class="docutils citation" frame="void" id="yaml" rules="none">
+<colgroup><col class="label" /><col /></colgroup>
+<tbody valign="top">
+<tr><td class="label"><a class="fn-backref" href="#id25">[YAML]</a></td><td><a class="reference external" href="http://www.yaml.org/spec/1.2/spec.html">YAML Ain’t Markup Language (YAMLâ¢) Version 1.2</a></td></tr>
+</tbody>
+</table>
+<table class="docutils citation" frame="void" id="id41" rules="none">
+<colgroup><col class="label" /><col /></colgroup>
+<tbody valign="top">
+<tr><td class="label">[OpenCL]</td><td><em>(<a class="fn-backref" href="#id13">1</a>, <a class="fn-backref" href="#id32">2</a>)</em> <a class="reference external" href="http://www.khronos.org/registry/cl/specs/opencl-2.0.pdf">The OpenCL Specification Version 2.0</a></td></tr>
+</tbody>
+</table>
+<table class="docutils citation" frame="void" id="hrf" rules="none">
+<colgroup><col class="label" /><col /></colgroup>
+<tbody valign="top">
+<tr><td class="label"><a class="fn-backref" href="#id12">[HRF]</a></td><td><a class="reference external" href="http://benedictgaster.org/wp-content/uploads/2014/01/asplos269-FINAL.pdf">Heterogeneous-race-free Memory Models</a></td></tr>
+</tbody>
+</table>
+<table class="docutils citation" frame="void" id="clang-attr" rules="none">
+<colgroup><col class="label" /><col /></colgroup>
+<tbody valign="top">
+<tr><td class="label">[CLANG-ATTR]</td><td><em>(<a class="fn-backref" href="#id14">1</a>, <a class="fn-backref" href="#id15">2</a>, <a class="fn-backref" href="#id16">3</a>, <a class="fn-backref" href="#id17">4</a>)</em> <a class="reference external" href="http://clang.llvm.org/docs/AttributeReference.html">Attributes in Clang</a></td></tr>
+</tbody>
+</table>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="AMDGPUAsmGFX7.html" title="Syntax of GFX7 Instructions"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="NVPTXUsage.html" title="User Guide for NVPTX Back-end"
+ >previous</a> |</li>
+ <li><a href="http://llvm.org/">LLVM Home</a> | </li>
+ <li><a href="index.html">Documentation</a>»</li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2003-2018, LLVM Project.
+ Last updated on 2018-12-21.
+ Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.1.3.
+ </div>
+ </body>
+</html>
\ No newline at end of file
Added: www-releases/trunk/7.0.1/docs/AdvancedBuilds.html
URL: http://llvm.org/viewvc/llvm-project/www-releases/trunk/7.0.1/docs/AdvancedBuilds.html?rev=349965&view=auto
==============================================================================
--- www-releases/trunk/7.0.1/docs/AdvancedBuilds.html (added)
+++ www-releases/trunk/7.0.1/docs/AdvancedBuilds.html Fri Dec 21 13:53:02 2018
@@ -0,0 +1,240 @@
+
+
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Advanced Build Configurations — LLVM 7 documentation</title>
+
+ <link rel="stylesheet" href="_static/llvm-theme.css" type="text/css" />
+ <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '',
+ VERSION: '7',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="_static/jquery.js"></script>
+ <script type="text/javascript" src="_static/underscore.js"></script>
+ <script type="text/javascript" src="_static/doctools.js"></script>
+ <link rel="top" title="LLVM 7 documentation" href="index.html" />
+ <link rel="next" title="How To Build On ARM" href="HowToBuildOnARM.html" />
+ <link rel="prev" title="CMake Primer" href="CMakePrimer.html" />
+<style type="text/css">
+ table.right { float: right; margin-left: 20px; }
+ table.right td { border: 1px solid #ccc; }
+</style>
+
+ </head>
+ <body>
+<div class="logo">
+ <a href="index.html">
+ <img src="_static/logo.png"
+ alt="LLVM Logo" width="250" height="88"/></a>
+</div>
+
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="HowToBuildOnARM.html" title="How To Build On ARM"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="CMakePrimer.html" title="CMake Primer"
+ accesskey="P">previous</a> |</li>
+ <li><a href="http://llvm.org/">LLVM Home</a> | </li>
+ <li><a href="index.html">Documentation</a>»</li>
+
+ </ul>
+ </div>
+
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="body">
+
+ <div class="section" id="advanced-build-configurations">
+<h1>Advanced Build Configurations<a class="headerlink" href="#advanced-build-configurations" title="Permalink to this headline">¶</a></h1>
+<div class="contents local topic" id="contents">
+<ul class="simple">
+<li><a class="reference internal" href="#introduction" id="id1">Introduction</a></li>
+<li><a class="reference internal" href="#bootstrap-builds" id="id2">Bootstrap Builds</a></li>
+<li><a class="reference internal" href="#apple-clang-builds-a-more-complex-bootstrap" id="id3">Apple Clang Builds (A More Complex Bootstrap)</a></li>
+<li><a class="reference internal" href="#multi-stage-pgo" id="id4">Multi-stage PGO</a></li>
+<li><a class="reference internal" href="#stage-non-determinism" id="id5">3-Stage Non-Determinism</a></li>
+</ul>
+</div>
+<div class="section" id="introduction">
+<h2><a class="toc-backref" href="#id1">Introduction</a><a class="headerlink" href="#introduction" title="Permalink to this headline">¶</a></h2>
+<p><a class="reference external" href="http://www.cmake.org/">CMake</a> is a cross-platform build-generator tool. CMake
+does not build the project, it generates the files needed by your build tool
+(GNU make, Visual Studio, etc.) for building LLVM.</p>
+<p>If <strong>you are a new contributor</strong>, please start with the <a class="reference internal" href="GettingStarted.html"><em>Getting Started with the LLVM System</em></a> or
+<a class="reference internal" href="CMake.html"><em>Building LLVM with CMake</em></a> pages. This page is intended for users doing more complex builds.</p>
+<p>Many of the examples below are written assuming specific CMake Generators.
+Unless otherwise explicitly called out these commands should work with any CMake
+generator.</p>
+</div>
+<div class="section" id="bootstrap-builds">
+<h2><a class="toc-backref" href="#id2">Bootstrap Builds</a><a class="headerlink" href="#bootstrap-builds" title="Permalink to this headline">¶</a></h2>
+<p>The Clang CMake build system supports bootstrap (aka multi-stage) builds. At a
+high level a multi-stage build is a chain of builds that pass data from one
+stage into the next. The most common and simple version of this is a traditional
+bootstrap build.</p>
+<p>In a simple two-stage bootstrap build, we build clang using the system compiler,
+then use that just-built clang to build clang again. In CMake this simplest form
+of a bootstrap build can be configured with a single option,
+CLANG_ENABLE_BOOTSTRAP.</p>
+<div class="highlight-console"><div class="highlight"><pre><span class="gp">$</span> cmake -G Ninja -DCLANG_ENABLE_BOOTSTRAP<span class="o">=</span>On <path to <span class="nb">source</span>>
+<span class="gp">$</span> ninja stage2
+</pre></div>
+</div>
+<p>This command itself isn’t terribly useful because it assumes default
+configurations for each stage. The next series of examples utilize CMake cache
+scripts to provide more complex options.</p>
+<p>The clang build system refers to builds as stages. A stage1 build is a standard
+build using the compiler installed on the host, and a stage2 build is built
+using the stage1 compiler. This nomenclature holds up to more stages too. In
+general a stage*n* build is built using the output from stage*n-1*.</p>
+</div>
+<div class="section" id="apple-clang-builds-a-more-complex-bootstrap">
+<h2><a class="toc-backref" href="#id3">Apple Clang Builds (A More Complex Bootstrap)</a><a class="headerlink" href="#apple-clang-builds-a-more-complex-bootstrap" title="Permalink to this headline">¶</a></h2>
+<p>Apple’s Clang builds are a slightly more complicated example of the simple
+bootstrapping scenario. Apple Clang is built using a 2-stage build.</p>
+<p>The stage1 compiler is a host-only compiler with some options set. The stage1
+compiler is a balance of optimization vs build time because it is a throwaway.
+The stage2 compiler is the fully optimized compiler intended to ship to users.</p>
+<p>Setting up these compilers requires a lot of options. To simplify the
+configuration the Apple Clang build settings are contained in CMake Cache files.
+You can build an Apple Clang compiler using the following commands:</p>
+<div class="highlight-console"><div class="highlight"><pre><span class="gp">$</span> cmake -G Ninja -C <path to clang>/cmake/caches/Apple-stage1.cmake <path to <span class="nb">source</span>>
+<span class="gp">$</span> ninja stage2-distribution
+</pre></div>
+</div>
+<p>This CMake invocation configures the stage1 host compiler, and sets
+CLANG_BOOTSTRAP_CMAKE_ARGS to pass the Apple-stage2.cmake cache script to the
+stage2 configuration step.</p>
+<p>When you build the stage2-distribution target it builds the minimal stage1
+compiler and required tools, then configures and builds the stage2 compiler
+based on the settings in Apple-stage2.cmake.</p>
+<p>This pattern of using cache scripts to set complex settings, and specifically to
+make later stage builds include cache scripts is common in our more advanced
+build configurations.</p>
+</div>
+<div class="section" id="multi-stage-pgo">
+<h2><a class="toc-backref" href="#id4">Multi-stage PGO</a><a class="headerlink" href="#multi-stage-pgo" title="Permalink to this headline">¶</a></h2>
+<p>Profile-Guided Optimizations (PGO) is a really great way to optimize the code
+clang generates. Our multi-stage PGO builds are a workflow for generating PGO
+profiles that can be used to optimize clang.</p>
+<p>At a high level, the way PGO works is that you build an instrumented compiler,
+then you run the instrumented compiler against sample source files. While the
+instrumented compiler runs it will output a bunch of files containing
+performance counters (.profraw files). After generating all the profraw files
+you use llvm-profdata to merge the files into a single profdata file that you
+can feed into the LLVM_PROFDATA_FILE option.</p>
+<p>Our PGO.cmake cache script automates that whole process. You can use it by
+running:</p>
+<div class="highlight-console"><div class="highlight"><pre><span class="gp">$</span> cmake -G Ninja -C <path_to_clang>/cmake/caches/PGO.cmake <<span class="nb">source </span>dir>
+<span class="gp">$</span> ninja stage2-instrumented-generate-profdata
+</pre></div>
+</div>
+<p>If you let that run for a few hours or so, it will place a profdata file in your
+build directory. This takes a really long time because it builds clang twice,
+and you <em>must</em> have compiler-rt in your build tree.</p>
+<p>This process uses any source files under the perf-training directory as training
+data as long as the source files are marked up with LIT-style RUN lines.</p>
+<p>After it finishes you can use âfind . -name clang.profdataâ to find it, but it
+should be at a path something like:</p>
+<div class="highlight-console"><div class="highlight"><pre><span class="go"><build dir>/tools/clang/stage2-instrumented-bins/utils/perf-training/clang.profdata</span>
+</pre></div>
+</div>
+<p>You can feed that file into the LLVM_PROFDATA_FILE option when you build your
+optimized compiler.</p>
+<p>The PGO came cache has a slightly different stage naming scheme than other
+multi-stage builds. It generates three stages; stage1, stage2-instrumented, and
+stage2. Both of the stage2 builds are built using the stage1 compiler.</p>
+<p>The PGO came cache generates the following additional targets:</p>
+<dl class="docutils">
+<dt><strong>stage2-instrumented</strong></dt>
+<dd>Builds a stage1 x86 compiler, runtime, and required tools (llvm-config,
+llvm-profdata) then uses that compiler to build an instrumented stage2 compiler.</dd>
+<dt><strong>stage2-instrumented-generate-profdata</strong></dt>
+<dd>Depends on “stage2-instrumented” and will use the instrumented compiler to
+generate profdata based on the training files in <clang>/utils/perf-training</dd>
+<dt><strong>stage2</strong></dt>
+<dd>Depends of “stage2-instrumented-generate-profdata” and will use the stage1
+compiler with the stage2 profdata to build a PGO-optimized compiler.</dd>
+<dt><strong>stage2-check-llvm</strong></dt>
+<dd>Depends on stage2 and runs check-llvm using the stage2 compiler.</dd>
+<dt><strong>stage2-check-clang</strong></dt>
+<dd>Depends on stage2 and runs check-clang using the stage2 compiler.</dd>
+<dt><strong>stage2-check-all</strong></dt>
+<dd>Depends on stage2 and runs check-all using the stage2 compiler.</dd>
+<dt><strong>stage2-test-suite</strong></dt>
+<dd>Depends on stage2 and runs the test-suite using the stage3 compiler (requires
+in-tree test-suite).</dd>
+</dl>
+</div>
+<div class="section" id="stage-non-determinism">
+<h2><a class="toc-backref" href="#id5">3-Stage Non-Determinism</a><a class="headerlink" href="#stage-non-determinism" title="Permalink to this headline">¶</a></h2>
+<p>In the ancient lore of compilers non-determinism is like the multi-headed hydra.
+Whenever its head pops up, terror and chaos ensue.</p>
+<p>Historically one of the tests to verify that a compiler was deterministic would
+be a three stage build. The idea of a three stage build is you take your sources
+and build a compiler (stage1), then use that compiler to rebuild the sources
+(stage2), then you use that compiler to rebuild the sources a third time
+(stage3) with an identical configuration to the stage2 build. At the end of
+this, you have a stage2 and stage3 compiler that should be bit-for-bit
+identical.</p>
+<p>You can perform one of these 3-stage builds with LLVM & clang using the
+following commands:</p>
+<div class="highlight-console"><div class="highlight"><pre><span class="gp">$</span> cmake -G Ninja -C <path_to_clang>/cmake/caches/3-stage.cmake <<span class="nb">source </span>dir>
+<span class="gp">$</span> ninja stage3
+</pre></div>
+</div>
+<p>After the build you can compare the stage2 & stage3 compilers. We have a bot
+setup <a class="reference external" href="http://lab.llvm.org:8011/builders/clang-3stage-ubuntu">here</a> that runs
+this build and compare configuration.</p>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="HowToBuildOnARM.html" title="How To Build On ARM"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="CMakePrimer.html" title="CMake Primer"
+ >previous</a> |</li>
+ <li><a href="http://llvm.org/">LLVM Home</a> | </li>
+ <li><a href="index.html">Documentation</a>»</li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2003-2018, LLVM Project.
+ Last updated on 2018-12-21.
+ Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.1.3.
+ </div>
+ </body>
+</html>
\ No newline at end of file
Added: www-releases/trunk/7.0.1/docs/AliasAnalysis.html
URL: http://llvm.org/viewvc/llvm-project/www-releases/trunk/7.0.1/docs/AliasAnalysis.html?rev=349965&view=auto
==============================================================================
--- www-releases/trunk/7.0.1/docs/AliasAnalysis.html (added)
+++ www-releases/trunk/7.0.1/docs/AliasAnalysis.html Fri Dec 21 13:53:02 2018
@@ -0,0 +1,749 @@
+
+
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>LLVM Alias Analysis Infrastructure — LLVM 7 documentation</title>
+
+ <link rel="stylesheet" href="_static/llvm-theme.css" type="text/css" />
+ <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '',
+ VERSION: '7',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="_static/jquery.js"></script>
+ <script type="text/javascript" src="_static/underscore.js"></script>
+ <script type="text/javascript" src="_static/doctools.js"></script>
+ <link rel="top" title="LLVM 7 documentation" href="index.html" />
+ <link rel="next" title="MemorySSA" href="MemorySSA.html" />
+ <link rel="prev" title="Using -opt-bisect-limit to debug optimization errors" href="OptBisect.html" />
+<style type="text/css">
+ table.right { float: right; margin-left: 20px; }
+ table.right td { border: 1px solid #ccc; }
+</style>
+
+ </head>
+ <body>
+<div class="logo">
+ <a href="index.html">
+ <img src="_static/logo.png"
+ alt="LLVM Logo" width="250" height="88"/></a>
+</div>
+
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="MemorySSA.html" title="MemorySSA"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="OptBisect.html" title="Using -opt-bisect-limit to debug optimization errors"
+ accesskey="P">previous</a> |</li>
+ <li><a href="http://llvm.org/">LLVM Home</a> | </li>
+ <li><a href="index.html">Documentation</a>»</li>
+
+ </ul>
+ </div>
+
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="body">
+
+ <div class="section" id="llvm-alias-analysis-infrastructure">
+<h1>LLVM Alias Analysis Infrastructure<a class="headerlink" href="#llvm-alias-analysis-infrastructure" title="Permalink to this headline">¶</a></h1>
+<div class="contents local topic" id="contents">
+<ul class="simple">
+<li><a class="reference internal" href="#introduction" id="id1">Introduction</a></li>
+<li><a class="reference internal" href="#aliasanalysis-class-overview" id="id2"><tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> Class Overview</a><ul>
+<li><a class="reference internal" href="#representation-of-pointers" id="id3">Representation of Pointers</a></li>
+<li><a class="reference internal" href="#the-alias-method" id="id4">The <tt class="docutils literal"><span class="pre">alias</span></tt> method</a><ul>
+<li><a class="reference internal" href="#must-may-and-no-alias-responses" id="id5">Must, May, and No Alias Responses</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#the-getmodrefinfo-methods" id="id6">The <tt class="docutils literal"><span class="pre">getModRefInfo</span></tt> methods</a></li>
+<li><a class="reference internal" href="#other-useful-aliasanalysis-methods" id="id7">Other useful <tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> methods</a><ul>
+<li><a class="reference internal" href="#the-pointstoconstantmemory-method" id="id8">The <tt class="docutils literal"><span class="pre">pointsToConstantMemory</span></tt> method</a></li>
+<li><a class="reference internal" href="#the-doesnotaccessmemory-and-onlyreadsmemory-methods" id="id9">The <tt class="docutils literal"><span class="pre">doesNotAccessMemory</span></tt> and <tt class="docutils literal"><span class="pre">onlyReadsMemory</span></tt> methods</a></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><a class="reference internal" href="#writing-a-new-aliasanalysis-implementation" id="id10">Writing a new <tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> Implementation</a><ul>
+<li><a class="reference internal" href="#different-pass-styles" id="id11">Different Pass styles</a></li>
+<li><a class="reference internal" href="#required-initialization-calls" id="id12">Required initialization calls</a></li>
+<li><a class="reference internal" href="#required-methods-to-override" id="id13">Required methods to override</a></li>
+<li><a class="reference internal" href="#interfaces-which-may-be-specified" id="id14">Interfaces which may be specified</a></li>
+<li><a class="reference internal" href="#aliasanalysis-chaining-behavior" id="id15"><tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> chaining behavior</a></li>
+<li><a class="reference internal" href="#updating-analysis-results-for-transformations" id="id16">Updating analysis results for transformations</a><ul>
+<li><a class="reference internal" href="#the-deletevalue-method" id="id17">The <tt class="docutils literal"><span class="pre">deleteValue</span></tt> method</a></li>
+<li><a class="reference internal" href="#the-copyvalue-method" id="id18">The <tt class="docutils literal"><span class="pre">copyValue</span></tt> method</a></li>
+<li><a class="reference internal" href="#the-replacewithnewvalue-method" id="id19">The <tt class="docutils literal"><span class="pre">replaceWithNewValue</span></tt> method</a></li>
+<li><a class="reference internal" href="#the-addescapinguse-method" id="id20">The <tt class="docutils literal"><span class="pre">addEscapingUse</span></tt> method</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#efficiency-issues" id="id21">Efficiency Issues</a></li>
+<li><a class="reference internal" href="#limitations" id="id22">Limitations</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#using-alias-analysis-results" id="id23">Using alias analysis results</a><ul>
+<li><a class="reference internal" href="#using-the-memorydependenceanalysis-pass" id="id24">Using the <tt class="docutils literal"><span class="pre">MemoryDependenceAnalysis</span></tt> Pass</a></li>
+<li><a class="reference internal" href="#using-the-aliassettracker-class" id="id25">Using the <tt class="docutils literal"><span class="pre">AliasSetTracker</span></tt> class</a><ul>
+<li><a class="reference internal" href="#the-aliassettracker-implementation" id="id26">The AliasSetTracker implementation</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#using-the-aliasanalysis-interface-directly" id="id27">Using the <tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> interface directly</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#existing-alias-analysis-implementations-and-clients" id="id28">Existing alias analysis implementations and clients</a><ul>
+<li><a class="reference internal" href="#available-aliasanalysis-implementations" id="id29">Available <tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> implementations</a><ul>
+<li><a class="reference internal" href="#the-no-aa-pass" id="id30">The <tt class="docutils literal"><span class="pre">-no-aa</span></tt> pass</a></li>
+<li><a class="reference internal" href="#the-basicaa-pass" id="id31">The <tt class="docutils literal"><span class="pre">-basicaa</span></tt> pass</a></li>
+<li><a class="reference internal" href="#the-globalsmodref-aa-pass" id="id32">The <tt class="docutils literal"><span class="pre">-globalsmodref-aa</span></tt> pass</a></li>
+<li><a class="reference internal" href="#the-steens-aa-pass" id="id33">The <tt class="docutils literal"><span class="pre">-steens-aa</span></tt> pass</a></li>
+<li><a class="reference internal" href="#the-ds-aa-pass" id="id34">The <tt class="docutils literal"><span class="pre">-ds-aa</span></tt> pass</a></li>
+<li><a class="reference internal" href="#the-scev-aa-pass" id="id35">The <tt class="docutils literal"><span class="pre">-scev-aa</span></tt> pass</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#alias-analysis-driven-transformations" id="id36">Alias analysis driven transformations</a><ul>
+<li><a class="reference internal" href="#the-adce-pass" id="id37">The <tt class="docutils literal"><span class="pre">-adce</span></tt> pass</a></li>
+<li><a class="reference internal" href="#the-licm-pass" id="id38">The <tt class="docutils literal"><span class="pre">-licm</span></tt> pass</a></li>
+<li><a class="reference internal" href="#the-argpromotion-pass" id="id39">The <tt class="docutils literal"><span class="pre">-argpromotion</span></tt> pass</a></li>
+<li><a class="reference internal" href="#the-gvn-memcpyopt-and-dse-passes" id="id40">The <tt class="docutils literal"><span class="pre">-gvn</span></tt>, <tt class="docutils literal"><span class="pre">-memcpyopt</span></tt>, and <tt class="docutils literal"><span class="pre">-dse</span></tt> passes</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#clients-for-debugging-and-evaluation-of-implementations" id="id41">Clients for debugging and evaluation of implementations</a><ul>
+<li><a class="reference internal" href="#the-print-alias-sets-pass" id="id42">The <tt class="docutils literal"><span class="pre">-print-alias-sets</span></tt> pass</a></li>
+<li><a class="reference internal" href="#the-aa-eval-pass" id="id43">The <tt class="docutils literal"><span class="pre">-aa-eval</span></tt> pass</a></li>
+</ul>
+</li>
+</ul>
+</li>
+<li><a class="reference internal" href="#memory-dependence-analysis" id="id44">Memory Dependence Analysis</a></li>
+</ul>
+</div>
+<div class="section" id="introduction">
+<h2><a class="toc-backref" href="#id1">Introduction</a><a class="headerlink" href="#introduction" title="Permalink to this headline">¶</a></h2>
+<p>Alias Analysis (aka Pointer Analysis) is a class of techniques which attempt to
+determine whether or not two pointers ever can point to the same object in
+memory. There are many different algorithms for alias analysis and many
+different ways of classifying them: flow-sensitive vs. flow-insensitive,
+context-sensitive vs. context-insensitive, field-sensitive
+vs. field-insensitive, unification-based vs. subset-based, etc. Traditionally,
+alias analyses respond to a query with a <a class="reference internal" href="#must-may-or-no">Must, May, or No</a> alias response,
+indicating that two pointers always point to the same object, might point to the
+same object, or are known to never point to the same object.</p>
+<p>The LLVM <a class="reference external" href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a> class is the
+primary interface used by clients and implementations of alias analyses in the
+LLVM system. This class is the common interface between clients of alias
+analysis information and the implementations providing it, and is designed to
+support a wide range of implementations and clients (but currently all clients
+are assumed to be flow-insensitive). In addition to simple alias analysis
+information, this class exposes Mod/Ref information from those implementations
+which can provide it, allowing for powerful analyses and transformations to work
+well together.</p>
+<p>This document contains information necessary to successfully implement this
+interface, use it, and to test both sides. It also explains some of the finer
+points about what exactly results mean.</p>
+</div>
+<div class="section" id="aliasanalysis-class-overview">
+<h2><a class="toc-backref" href="#id2"><tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> Class Overview</a><a class="headerlink" href="#aliasanalysis-class-overview" title="Permalink to this headline">¶</a></h2>
+<p>The <a class="reference external" href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a>
+class defines the interface that the various alias analysis implementations
+should support. This class exports two important enums: <tt class="docutils literal"><span class="pre">AliasResult</span></tt> and
+<tt class="docutils literal"><span class="pre">ModRefResult</span></tt> which represent the result of an alias query or a mod/ref
+query, respectively.</p>
+<p>The <tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> interface exposes information about memory, represented in
+several different ways. In particular, memory objects are represented as a
+starting address and size, and function calls are represented as the actual
+<tt class="docutils literal"><span class="pre">call</span></tt> or <tt class="docutils literal"><span class="pre">invoke</span></tt> instructions that performs the call. The
+<tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> interface also exposes some helper methods which allow you to
+get mod/ref information for arbitrary instructions.</p>
+<p>All <tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> interfaces require that in queries involving multiple
+values, values which are not <a class="reference internal" href="LangRef.html#constants"><em>constants</em></a> are all
+defined within the same function.</p>
+<div class="section" id="representation-of-pointers">
+<h3><a class="toc-backref" href="#id3">Representation of Pointers</a><a class="headerlink" href="#representation-of-pointers" title="Permalink to this headline">¶</a></h3>
+<p>Most importantly, the <tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> class provides several methods which are
+used to query whether or not two memory objects alias, whether function calls
+can modify or read a memory object, etc. For all of these queries, memory
+objects are represented as a pair of their starting address (a symbolic LLVM
+<tt class="docutils literal"><span class="pre">Value*</span></tt>) and a static size.</p>
+<p>Representing memory objects as a starting address and a size is critically
+important for correct Alias Analyses. For example, consider this (silly, but
+possible) C code:</p>
+<div class="highlight-c++"><div class="highlight"><pre><span class="kt">int</span> <span class="n">i</span><span class="p">;</span>
+<span class="kt">char</span> <span class="n">C</span><span class="p">[</span><span class="mi">2</span><span class="p">];</span>
+<span class="kt">char</span> <span class="n">A</span><span class="p">[</span><span class="mi">10</span><span class="p">];</span>
+<span class="cm">/* ... */</span>
+<span class="k">for</span> <span class="p">(</span><span class="n">i</span> <span class="o">=</span> <span class="mi">0</span><span class="p">;</span> <span class="n">i</span> <span class="o">!=</span> <span class="mi">10</span><span class="p">;</span> <span class="o">++</span><span class="n">i</span><span class="p">)</span> <span class="p">{</span>
+ <span class="n">C</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="n">A</span><span class="p">[</span><span class="n">i</span><span class="p">];</span> <span class="cm">/* One byte store */</span>
+ <span class="n">C</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">=</span> <span class="n">A</span><span class="p">[</span><span class="mi">9</span><span class="o">-</span><span class="n">i</span><span class="p">];</span> <span class="cm">/* One byte store */</span>
+<span class="p">}</span>
+</pre></div>
+</div>
+<p>In this case, the <tt class="docutils literal"><span class="pre">basicaa</span></tt> pass will disambiguate the stores to <tt class="docutils literal"><span class="pre">C[0]</span></tt> and
+<tt class="docutils literal"><span class="pre">C[1]</span></tt> because they are accesses to two distinct locations one byte apart, and
+the accesses are each one byte. In this case, the Loop Invariant Code Motion
+(LICM) pass can use store motion to remove the stores from the loop. In
+constrast, the following code:</p>
+<div class="highlight-c++"><div class="highlight"><pre><span class="kt">int</span> <span class="n">i</span><span class="p">;</span>
+<span class="kt">char</span> <span class="n">C</span><span class="p">[</span><span class="mi">2</span><span class="p">];</span>
+<span class="kt">char</span> <span class="n">A</span><span class="p">[</span><span class="mi">10</span><span class="p">];</span>
+<span class="cm">/* ... */</span>
+<span class="k">for</span> <span class="p">(</span><span class="n">i</span> <span class="o">=</span> <span class="mi">0</span><span class="p">;</span> <span class="n">i</span> <span class="o">!=</span> <span class="mi">10</span><span class="p">;</span> <span class="o">++</span><span class="n">i</span><span class="p">)</span> <span class="p">{</span>
+ <span class="p">((</span><span class="kt">short</span><span class="o">*</span><span class="p">)</span><span class="n">C</span><span class="p">)[</span><span class="mi">0</span><span class="p">]</span> <span class="o">=</span> <span class="n">A</span><span class="p">[</span><span class="n">i</span><span class="p">];</span> <span class="cm">/* Two byte store! */</span>
+ <span class="n">C</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">=</span> <span class="n">A</span><span class="p">[</span><span class="mi">9</span><span class="o">-</span><span class="n">i</span><span class="p">];</span> <span class="cm">/* One byte store */</span>
+<span class="p">}</span>
+</pre></div>
+</div>
+<p>In this case, the two stores to C do alias each other, because the access to the
+<tt class="docutils literal"><span class="pre">&C[0]</span></tt> element is a two byte access. If size information wasn’t available in
+the query, even the first case would have to conservatively assume that the
+accesses alias.</p>
+</div>
+<div class="section" id="the-alias-method">
+<span id="alias"></span><h3><a class="toc-backref" href="#id4">The <tt class="docutils literal"><span class="pre">alias</span></tt> method</a><a class="headerlink" href="#the-alias-method" title="Permalink to this headline">¶</a></h3>
+<p>The <tt class="docutils literal"><span class="pre">alias</span></tt> method is the primary interface used to determine whether or not
+two memory objects alias each other. It takes two memory objects as input and
+returns MustAlias, PartialAlias, MayAlias, or NoAlias as appropriate.</p>
+<p>Like all <tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> interfaces, the <tt class="docutils literal"><span class="pre">alias</span></tt> method requires that either
+the two pointer values be defined within the same function, or at least one of
+the values is a <a class="reference internal" href="LangRef.html#constants"><em>constant</em></a>.</p>
+<div class="section" id="must-may-and-no-alias-responses">
+<span id="must-may-or-no"></span><h4><a class="toc-backref" href="#id5">Must, May, and No Alias Responses</a><a class="headerlink" href="#must-may-and-no-alias-responses" title="Permalink to this headline">¶</a></h4>
+<p>The <tt class="docutils literal"><span class="pre">NoAlias</span></tt> response may be used when there is never an immediate dependence
+between any memory reference <em>based</em> on one pointer and any memory reference
+<em>based</em> the other. The most obvious example is when the two pointers point to
+non-overlapping memory ranges. Another is when the two pointers are only ever
+used for reading memory. Another is when the memory is freed and reallocated
+between accesses through one pointer and accesses through the other — in this
+case, there is a dependence, but it’s mediated by the free and reallocation.</p>
+<p>As an exception to this is with the <a class="reference internal" href="LangRef.html#noalias"><em>noalias</em></a> keyword;
+the “irrelevant” dependencies are ignored.</p>
+<p>The <tt class="docutils literal"><span class="pre">MayAlias</span></tt> response is used whenever the two pointers might refer to the
+same object.</p>
+<p>The <tt class="docutils literal"><span class="pre">PartialAlias</span></tt> response is used when the two memory objects are known to
+be overlapping in some way, regardless whether they start at the same address
+or not.</p>
+<p>The <tt class="docutils literal"><span class="pre">MustAlias</span></tt> response may only be returned if the two memory objects are
+guaranteed to always start at exactly the same location. A <tt class="docutils literal"><span class="pre">MustAlias</span></tt>
+response does not imply that the pointers compare equal.</p>
+</div>
+</div>
+<div class="section" id="the-getmodrefinfo-methods">
+<h3><a class="toc-backref" href="#id6">The <tt class="docutils literal"><span class="pre">getModRefInfo</span></tt> methods</a><a class="headerlink" href="#the-getmodrefinfo-methods" title="Permalink to this headline">¶</a></h3>
+<p>The <tt class="docutils literal"><span class="pre">getModRefInfo</span></tt> methods return information about whether the execution of
+an instruction can read or modify a memory location. Mod/Ref information is
+always conservative: if an instruction <strong>might</strong> read or write a location,
+<tt class="docutils literal"><span class="pre">ModRef</span></tt> is returned.</p>
+<p>The <tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> class also provides a <tt class="docutils literal"><span class="pre">getModRefInfo</span></tt> method for testing
+dependencies between function calls. This method takes two call sites (<tt class="docutils literal"><span class="pre">CS1</span></tt>
+& <tt class="docutils literal"><span class="pre">CS2</span></tt>), returns <tt class="docutils literal"><span class="pre">NoModRef</span></tt> if neither call writes to memory read or
+written by the other, <tt class="docutils literal"><span class="pre">Ref</span></tt> if <tt class="docutils literal"><span class="pre">CS1</span></tt> reads memory written by <tt class="docutils literal"><span class="pre">CS2</span></tt>,
+<tt class="docutils literal"><span class="pre">Mod</span></tt> if <tt class="docutils literal"><span class="pre">CS1</span></tt> writes to memory read or written by <tt class="docutils literal"><span class="pre">CS2</span></tt>, or <tt class="docutils literal"><span class="pre">ModRef</span></tt> if
+<tt class="docutils literal"><span class="pre">CS1</span></tt> might read or write memory written to by <tt class="docutils literal"><span class="pre">CS2</span></tt>. Note that this
+relation is not commutative.</p>
+</div>
+<div class="section" id="other-useful-aliasanalysis-methods">
+<h3><a class="toc-backref" href="#id7">Other useful <tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> methods</a><a class="headerlink" href="#other-useful-aliasanalysis-methods" title="Permalink to this headline">¶</a></h3>
+<p>Several other tidbits of information are often collected by various alias
+analysis implementations and can be put to good use by various clients.</p>
+<div class="section" id="the-pointstoconstantmemory-method">
+<h4><a class="toc-backref" href="#id8">The <tt class="docutils literal"><span class="pre">pointsToConstantMemory</span></tt> method</a><a class="headerlink" href="#the-pointstoconstantmemory-method" title="Permalink to this headline">¶</a></h4>
+<p>The <tt class="docutils literal"><span class="pre">pointsToConstantMemory</span></tt> method returns true if and only if the analysis
+can prove that the pointer only points to unchanging memory locations
+(functions, constant global variables, and the null pointer). This information
+can be used to refine mod/ref information: it is impossible for an unchanging
+memory location to be modified.</p>
+</div>
+<div class="section" id="the-doesnotaccessmemory-and-onlyreadsmemory-methods">
+<span id="never-access-memory-or-only-read-memory"></span><h4><a class="toc-backref" href="#id9">The <tt class="docutils literal"><span class="pre">doesNotAccessMemory</span></tt> and <tt class="docutils literal"><span class="pre">onlyReadsMemory</span></tt> methods</a><a class="headerlink" href="#the-doesnotaccessmemory-and-onlyreadsmemory-methods" title="Permalink to this headline">¶</a></h4>
+<p>These methods are used to provide very simple mod/ref information for function
+calls. The <tt class="docutils literal"><span class="pre">doesNotAccessMemory</span></tt> method returns true for a function if the
+analysis can prove that the function never reads or writes to memory, or if the
+function only reads from constant memory. Functions with this property are
+side-effect free and only depend on their input arguments, allowing them to be
+eliminated if they form common subexpressions or be hoisted out of loops. Many
+common functions behave this way (e.g., <tt class="docutils literal"><span class="pre">sin</span></tt> and <tt class="docutils literal"><span class="pre">cos</span></tt>) but many others do
+not (e.g., <tt class="docutils literal"><span class="pre">acos</span></tt>, which modifies the <tt class="docutils literal"><span class="pre">errno</span></tt> variable).</p>
+<p>The <tt class="docutils literal"><span class="pre">onlyReadsMemory</span></tt> method returns true for a function if analysis can prove
+that (at most) the function only reads from non-volatile memory. Functions with
+this property are side-effect free, only depending on their input arguments and
+the state of memory when they are called. This property allows calls to these
+functions to be eliminated and moved around, as long as there is no store
+instruction that changes the contents of memory. Note that all functions that
+satisfy the <tt class="docutils literal"><span class="pre">doesNotAccessMemory</span></tt> method also satisfy <tt class="docutils literal"><span class="pre">onlyReadsMemory</span></tt>.</p>
+</div>
+</div>
+</div>
+<div class="section" id="writing-a-new-aliasanalysis-implementation">
+<h2><a class="toc-backref" href="#id10">Writing a new <tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> Implementation</a><a class="headerlink" href="#writing-a-new-aliasanalysis-implementation" title="Permalink to this headline">¶</a></h2>
+<p>Writing a new alias analysis implementation for LLVM is quite straight-forward.
+There are already several implementations that you can use for examples, and the
+following information should help fill in any details. For a examples, take a
+look at the <a class="reference internal" href="#various-alias-analysis-implementations">various alias analysis implementations</a> included with LLVM.</p>
+<div class="section" id="different-pass-styles">
+<h3><a class="toc-backref" href="#id11">Different Pass styles</a><a class="headerlink" href="#different-pass-styles" title="Permalink to this headline">¶</a></h3>
+<p>The first step to determining what type of <a class="reference internal" href="WritingAnLLVMPass.html"><em>LLVM pass</em></a>
+you need to use for your Alias Analysis. As is the case with most other
+analyses and transformations, the answer should be fairly obvious from what type
+of problem you are trying to solve:</p>
+<ol class="arabic simple">
+<li>If you require interprocedural analysis, it should be a <tt class="docutils literal"><span class="pre">Pass</span></tt>.</li>
+<li>If you are a function-local analysis, subclass <tt class="docutils literal"><span class="pre">FunctionPass</span></tt>.</li>
+<li>If you don’t need to look at the program at all, subclass <tt class="docutils literal"><span class="pre">ImmutablePass</span></tt>.</li>
+</ol>
+<p>In addition to the pass that you subclass, you should also inherit from the
+<tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> interface, of course, and use the <tt class="docutils literal"><span class="pre">RegisterAnalysisGroup</span></tt>
+template to register as an implementation of <tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt>.</p>
+</div>
+<div class="section" id="required-initialization-calls">
+<h3><a class="toc-backref" href="#id12">Required initialization calls</a><a class="headerlink" href="#required-initialization-calls" title="Permalink to this headline">¶</a></h3>
+<p>Your subclass of <tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> is required to invoke two methods on the
+<tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> base class: <tt class="docutils literal"><span class="pre">getAnalysisUsage</span></tt> and
+<tt class="docutils literal"><span class="pre">InitializeAliasAnalysis</span></tt>. In particular, your implementation of
+<tt class="docutils literal"><span class="pre">getAnalysisUsage</span></tt> should explicitly call into the
+<tt class="docutils literal"><span class="pre">AliasAnalysis::getAnalysisUsage</span></tt> method in addition to doing any declaring
+any pass dependencies your pass has. Thus you should have something like this:</p>
+<div class="highlight-c++"><div class="highlight"><pre><span class="kt">void</span> <span class="n">getAnalysisUsage</span><span class="p">(</span><span class="n">AnalysisUsage</span> <span class="o">&</span><span class="n">AU</span><span class="p">)</span> <span class="k">const</span> <span class="p">{</span>
+ <span class="n">AliasAnalysis</span><span class="o">::</span><span class="n">getAnalysisUsage</span><span class="p">(</span><span class="n">AU</span><span class="p">);</span>
+ <span class="c1">// declare your dependencies here.</span>
+<span class="p">}</span>
+</pre></div>
+</div>
+<p>Additionally, your must invoke the <tt class="docutils literal"><span class="pre">InitializeAliasAnalysis</span></tt> method from your
+analysis run method (<tt class="docutils literal"><span class="pre">run</span></tt> for a <tt class="docutils literal"><span class="pre">Pass</span></tt>, <tt class="docutils literal"><span class="pre">runOnFunction</span></tt> for a
+<tt class="docutils literal"><span class="pre">FunctionPass</span></tt>, or <tt class="docutils literal"><span class="pre">InitializePass</span></tt> for an <tt class="docutils literal"><span class="pre">ImmutablePass</span></tt>). For example
+(as part of a <tt class="docutils literal"><span class="pre">Pass</span></tt>):</p>
+<div class="highlight-c++"><div class="highlight"><pre><span class="kt">bool</span> <span class="n">run</span><span class="p">(</span><span class="n">Module</span> <span class="o">&</span><span class="n">M</span><span class="p">)</span> <span class="p">{</span>
+ <span class="n">InitializeAliasAnalysis</span><span class="p">(</span><span class="k">this</span><span class="p">);</span>
+ <span class="c1">// Perform analysis here...</span>
+ <span class="k">return</span> <span class="kc">false</span><span class="p">;</span>
+<span class="p">}</span>
+</pre></div>
+</div>
+</div>
+<div class="section" id="required-methods-to-override">
+<h3><a class="toc-backref" href="#id13">Required methods to override</a><a class="headerlink" href="#required-methods-to-override" title="Permalink to this headline">¶</a></h3>
+<p>You must override the <tt class="docutils literal"><span class="pre">getAdjustedAnalysisPointer</span></tt> method on all subclasses
+of <tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt>. An example implementation of this method would look like:</p>
+<div class="highlight-c++"><div class="highlight"><pre><span class="kt">void</span> <span class="o">*</span><span class="n">getAdjustedAnalysisPointer</span><span class="p">(</span><span class="k">const</span> <span class="kt">void</span><span class="o">*</span> <span class="n">ID</span><span class="p">)</span> <span class="n">override</span> <span class="p">{</span>
+ <span class="k">if</span> <span class="p">(</span><span class="n">ID</span> <span class="o">==</span> <span class="o">&</span><span class="n">AliasAnalysis</span><span class="o">::</span><span class="n">ID</span><span class="p">)</span>
+ <span class="k">return</span> <span class="p">(</span><span class="n">AliasAnalysis</span><span class="o">*</span><span class="p">)</span><span class="k">this</span><span class="p">;</span>
+ <span class="k">return</span> <span class="k">this</span><span class="p">;</span>
+<span class="p">}</span>
+</pre></div>
+</div>
+</div>
+<div class="section" id="interfaces-which-may-be-specified">
+<h3><a class="toc-backref" href="#id14">Interfaces which may be specified</a><a class="headerlink" href="#interfaces-which-may-be-specified" title="Permalink to this headline">¶</a></h3>
+<p>All of the <a class="reference external" href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a> virtual methods
+default to providing <a class="reference internal" href="#aliasanalysis-chaining"><em>chaining</em></a> to another alias
+analysis implementation, which ends up returning conservatively correct
+information (returning “May” Alias and “Mod/Ref” for alias and mod/ref queries
+respectively). Depending on the capabilities of the analysis you are
+implementing, you just override the interfaces you can improve.</p>
+</div>
+<div class="section" id="aliasanalysis-chaining-behavior">
+<span id="aliasanalysis-chaining"></span><h3><a class="toc-backref" href="#id15"><tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> chaining behavior</a><a class="headerlink" href="#aliasanalysis-chaining-behavior" title="Permalink to this headline">¶</a></h3>
+<p>With only one special exception (the <a class="reference internal" href="#aliasanalysis-no-aa"><em>-no-aa</em></a> pass)
+every alias analysis pass chains to another alias analysis implementation (for
+example, the user can specify “<tt class="docutils literal"><span class="pre">-basicaa</span> <span class="pre">-ds-aa</span> <span class="pre">-licm</span></tt>” to get the maximum
+benefit from both alias analyses). The alias analysis class automatically
+takes care of most of this for methods that you don’t override. For methods
+that you do override, in code paths that return a conservative MayAlias or
+Mod/Ref result, simply return whatever the superclass computes. For example:</p>
+<div class="highlight-c++"><div class="highlight"><pre><span class="n">AliasResult</span> <span class="n">alias</span><span class="p">(</span><span class="k">const</span> <span class="n">Value</span> <span class="o">*</span><span class="n">V1</span><span class="p">,</span> <span class="kt">unsigned</span> <span class="n">V1Size</span><span class="p">,</span>
+ <span class="k">const</span> <span class="n">Value</span> <span class="o">*</span><span class="n">V2</span><span class="p">,</span> <span class="kt">unsigned</span> <span class="n">V2Size</span><span class="p">)</span> <span class="p">{</span>
+ <span class="k">if</span> <span class="p">(...)</span>
+ <span class="k">return</span> <span class="n">NoAlias</span><span class="p">;</span>
+ <span class="p">...</span>
+
+ <span class="c1">// Couldn't determine a must or no-alias result.</span>
+ <span class="k">return</span> <span class="n">AliasAnalysis</span><span class="o">::</span><span class="n">alias</span><span class="p">(</span><span class="n">V1</span><span class="p">,</span> <span class="n">V1Size</span><span class="p">,</span> <span class="n">V2</span><span class="p">,</span> <span class="n">V2Size</span><span class="p">);</span>
+<span class="p">}</span>
+</pre></div>
+</div>
+<p>In addition to analysis queries, you must make sure to unconditionally pass LLVM
+<a class="reference internal" href="#update-notification">update notification</a> methods to the superclass as well if you override them,
+which allows all alias analyses in a change to be updated.</p>
+</div>
+<div class="section" id="updating-analysis-results-for-transformations">
+<span id="update-notification"></span><h3><a class="toc-backref" href="#id16">Updating analysis results for transformations</a><a class="headerlink" href="#updating-analysis-results-for-transformations" title="Permalink to this headline">¶</a></h3>
+<p>Alias analysis information is initially computed for a static snapshot of the
+program, but clients will use this information to make transformations to the
+code. All but the most trivial forms of alias analysis will need to have their
+analysis results updated to reflect the changes made by these transformations.</p>
+<p>The <tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> interface exposes four methods which are used to
+communicate program changes from the clients to the analysis implementations.
+Various alias analysis implementations should use these methods to ensure that
+their internal data structures are kept up-to-date as the program changes (for
+example, when an instruction is deleted), and clients of alias analysis must be
+sure to call these interfaces appropriately.</p>
+<div class="section" id="the-deletevalue-method">
+<h4><a class="toc-backref" href="#id17">The <tt class="docutils literal"><span class="pre">deleteValue</span></tt> method</a><a class="headerlink" href="#the-deletevalue-method" title="Permalink to this headline">¶</a></h4>
+<p>The <tt class="docutils literal"><span class="pre">deleteValue</span></tt> method is called by transformations when they remove an
+instruction or any other value from the program (including values that do not
+use pointers). Typically alias analyses keep data structures that have entries
+for each value in the program. When this method is called, they should remove
+any entries for the specified value, if they exist.</p>
+</div>
+<div class="section" id="the-copyvalue-method">
+<h4><a class="toc-backref" href="#id18">The <tt class="docutils literal"><span class="pre">copyValue</span></tt> method</a><a class="headerlink" href="#the-copyvalue-method" title="Permalink to this headline">¶</a></h4>
+<p>The <tt class="docutils literal"><span class="pre">copyValue</span></tt> method is used when a new value is introduced into the
+program. There is no way to introduce a value into the program that did not
+exist before (this doesn’t make sense for a safe compiler transformation), so
+this is the only way to introduce a new value. This method indicates that the
+new value has exactly the same properties as the value being copied.</p>
+</div>
+<div class="section" id="the-replacewithnewvalue-method">
+<h4><a class="toc-backref" href="#id19">The <tt class="docutils literal"><span class="pre">replaceWithNewValue</span></tt> method</a><a class="headerlink" href="#the-replacewithnewvalue-method" title="Permalink to this headline">¶</a></h4>
+<p>This method is a simple helper method that is provided to make clients easier to
+use. It is implemented by copying the old analysis information to the new
+value, then deleting the old value. This method cannot be overridden by alias
+analysis implementations.</p>
+</div>
+<div class="section" id="the-addescapinguse-method">
+<h4><a class="toc-backref" href="#id20">The <tt class="docutils literal"><span class="pre">addEscapingUse</span></tt> method</a><a class="headerlink" href="#the-addescapinguse-method" title="Permalink to this headline">¶</a></h4>
+<p>The <tt class="docutils literal"><span class="pre">addEscapingUse</span></tt> method is used when the uses of a pointer value have
+changed in ways that may invalidate precomputed analysis information.
+Implementations may either use this callback to provide conservative responses
+for points whose uses have change since analysis time, or may recompute some or
+all of their internal state to continue providing accurate responses.</p>
+<p>In general, any new use of a pointer value is considered an escaping use, and
+must be reported through this callback, <em>except</em> for the uses below:</p>
+<ul class="simple">
+<li>A <tt class="docutils literal"><span class="pre">bitcast</span></tt> or <tt class="docutils literal"><span class="pre">getelementptr</span></tt> of the pointer</li>
+<li>A <tt class="docutils literal"><span class="pre">store</span></tt> through the pointer (but not a <tt class="docutils literal"><span class="pre">store</span></tt> <em>of</em> the pointer)</li>
+<li>A <tt class="docutils literal"><span class="pre">load</span></tt> through the pointer</li>
+</ul>
+</div>
+</div>
+<div class="section" id="efficiency-issues">
+<h3><a class="toc-backref" href="#id21">Efficiency Issues</a><a class="headerlink" href="#efficiency-issues" title="Permalink to this headline">¶</a></h3>
+<p>From the LLVM perspective, the only thing you need to do to provide an efficient
+alias analysis is to make sure that alias analysis <strong>queries</strong> are serviced
+quickly. The actual calculation of the alias analysis results (the “run”
+method) is only performed once, but many (perhaps duplicate) queries may be
+performed. Because of this, try to move as much computation to the run method
+as possible (within reason).</p>
+</div>
+<div class="section" id="limitations">
+<h3><a class="toc-backref" href="#id22">Limitations</a><a class="headerlink" href="#limitations" title="Permalink to this headline">¶</a></h3>
+<p>The AliasAnalysis infrastructure has several limitations which make writing a
+new <tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> implementation difficult.</p>
+<p>There is no way to override the default alias analysis. It would be very useful
+to be able to do something like “<tt class="docutils literal"><span class="pre">opt</span> <span class="pre">-my-aa</span> <span class="pre">-O2</span></tt>” and have it use <tt class="docutils literal"><span class="pre">-my-aa</span></tt>
+for all passes which need AliasAnalysis, but there is currently no support for
+that, short of changing the source code and recompiling. Similarly, there is
+also no way of setting a chain of analyses as the default.</p>
+<p>There is no way for transform passes to declare that they preserve
+<tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> implementations. The <tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> interface includes
+<tt class="docutils literal"><span class="pre">deleteValue</span></tt> and <tt class="docutils literal"><span class="pre">copyValue</span></tt> methods which are intended to allow a pass to
+keep an AliasAnalysis consistent, however there’s no way for a pass to declare
+in its <tt class="docutils literal"><span class="pre">getAnalysisUsage</span></tt> that it does so. Some passes attempt to use
+<tt class="docutils literal"><span class="pre">AU.addPreserved<AliasAnalysis></span></tt>, however this doesn’t actually have any
+effect.</p>
+<p>Similarly, the <tt class="docutils literal"><span class="pre">opt</span> <span class="pre">-p</span></tt> option introduces <tt class="docutils literal"><span class="pre">ModulePass</span></tt> passes between each
+pass, which prevents the use of <tt class="docutils literal"><span class="pre">FunctionPass</span></tt> alias analysis passes.</p>
+<p>The <tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> API does have functions for notifying implementations when
+values are deleted or copied, however these aren’t sufficient. There are many
+other ways that LLVM IR can be modified which could be relevant to
+<tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> implementations which can not be expressed.</p>
+<p>The <tt class="docutils literal"><span class="pre">AliasAnalysisDebugger</span></tt> utility seems to suggest that <tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt>
+implementations can expect that they will be informed of any relevant <tt class="docutils literal"><span class="pre">Value</span></tt>
+before it appears in an alias query. However, popular clients such as <tt class="docutils literal"><span class="pre">GVN</span></tt>
+don’t support this, and are known to trigger errors when run with the
+<tt class="docutils literal"><span class="pre">AliasAnalysisDebugger</span></tt>.</p>
+<p>The <tt class="docutils literal"><span class="pre">AliasSetTracker</span></tt> class (which is used by <tt class="docutils literal"><span class="pre">LICM</span></tt>) makes a
+non-deterministic number of alias queries. This can cause debugging techniques
+involving pausing execution after a predetermined number of queries to be
+unreliable.</p>
+<p>Many alias queries can be reformulated in terms of other alias queries. When
+multiple <tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> queries are chained together, it would make sense to
+start those queries from the beginning of the chain, with care taken to avoid
+infinite looping, however currently an implementation which wants to do this can
+only start such queries from itself.</p>
+</div>
+</div>
+<div class="section" id="using-alias-analysis-results">
+<h2><a class="toc-backref" href="#id23">Using alias analysis results</a><a class="headerlink" href="#using-alias-analysis-results" title="Permalink to this headline">¶</a></h2>
+<p>There are several different ways to use alias analysis results. In order of
+preference, these are:</p>
+<div class="section" id="using-the-memorydependenceanalysis-pass">
+<h3><a class="toc-backref" href="#id24">Using the <tt class="docutils literal"><span class="pre">MemoryDependenceAnalysis</span></tt> Pass</a><a class="headerlink" href="#using-the-memorydependenceanalysis-pass" title="Permalink to this headline">¶</a></h3>
+<p>The <tt class="docutils literal"><span class="pre">memdep</span></tt> pass uses alias analysis to provide high-level dependence
+information about memory-using instructions. This will tell you which store
+feeds into a load, for example. It uses caching and other techniques to be
+efficient, and is used by Dead Store Elimination, GVN, and memcpy optimizations.</p>
+</div>
+<div class="section" id="using-the-aliassettracker-class">
+<span id="aliassettracker"></span><h3><a class="toc-backref" href="#id25">Using the <tt class="docutils literal"><span class="pre">AliasSetTracker</span></tt> class</a><a class="headerlink" href="#using-the-aliassettracker-class" title="Permalink to this headline">¶</a></h3>
+<p>Many transformations need information about alias <strong>sets</strong> that are active in
+some scope, rather than information about pairwise aliasing. The
+<a class="reference external" href="http://llvm.org/doxygen/classllvm_1_1AliasSetTracker.html">AliasSetTracker</a>
+class is used to efficiently build these Alias Sets from the pairwise alias
+analysis information provided by the <tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> interface.</p>
+<p>First you initialize the AliasSetTracker by using the “<tt class="docutils literal"><span class="pre">add</span></tt>” methods to add
+information about various potentially aliasing instructions in the scope you are
+interested in. Once all of the alias sets are completed, your pass should
+simply iterate through the constructed alias sets, using the <tt class="docutils literal"><span class="pre">AliasSetTracker</span></tt>
+<tt class="docutils literal"><span class="pre">begin()</span></tt>/<tt class="docutils literal"><span class="pre">end()</span></tt> methods.</p>
+<p>The <tt class="docutils literal"><span class="pre">AliasSet</span></tt>s formed by the <tt class="docutils literal"><span class="pre">AliasSetTracker</span></tt> are guaranteed to be
+disjoint, calculate mod/ref information and volatility for the set, and keep
+track of whether or not all of the pointers in the set are Must aliases. The
+AliasSetTracker also makes sure that sets are properly folded due to call
+instructions, and can provide a list of pointers in each set.</p>
+<p>As an example user of this, the <a class="reference external" href="doxygen/structLICM.html">Loop Invariant Code Motion</a> pass uses <tt class="docutils literal"><span class="pre">AliasSetTracker</span></tt>s to calculate alias
+sets for each loop nest. If an <tt class="docutils literal"><span class="pre">AliasSet</span></tt> in a loop is not modified, then all
+load instructions from that set may be hoisted out of the loop. If any alias
+sets are stored to <strong>and</strong> are must alias sets, then the stores may be sunk
+to outside of the loop, promoting the memory location to a register for the
+duration of the loop nest. Both of these transformations only apply if the
+pointer argument is loop-invariant.</p>
+<div class="section" id="the-aliassettracker-implementation">
+<h4><a class="toc-backref" href="#id26">The AliasSetTracker implementation</a><a class="headerlink" href="#the-aliassettracker-implementation" title="Permalink to this headline">¶</a></h4>
+<p>The AliasSetTracker class is implemented to be as efficient as possible. It
+uses the union-find algorithm to efficiently merge AliasSets when a pointer is
+inserted into the AliasSetTracker that aliases multiple sets. The primary data
+structure is a hash table mapping pointers to the AliasSet they are in.</p>
+<p>The AliasSetTracker class must maintain a list of all of the LLVM <tt class="docutils literal"><span class="pre">Value*</span></tt>s
+that are in each AliasSet. Since the hash table already has entries for each
+LLVM <tt class="docutils literal"><span class="pre">Value*</span></tt> of interest, the AliasesSets thread the linked list through
+these hash-table nodes to avoid having to allocate memory unnecessarily, and to
+make merging alias sets extremely efficient (the linked list merge is constant
+time).</p>
+<p>You shouldn’t need to understand these details if you are just a client of the
+AliasSetTracker, but if you look at the code, hopefully this brief description
+will help make sense of why things are designed the way they are.</p>
+</div>
+</div>
+<div class="section" id="using-the-aliasanalysis-interface-directly">
+<h3><a class="toc-backref" href="#id27">Using the <tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> interface directly</a><a class="headerlink" href="#using-the-aliasanalysis-interface-directly" title="Permalink to this headline">¶</a></h3>
+<p>If neither of these utility class are what your pass needs, you should use the
+interfaces exposed by the <tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> class directly. Try to use the
+higher-level methods when possible (e.g., use mod/ref information instead of the
+<a class="reference internal" href="#alias">alias</a> method directly if possible) to get the best precision and efficiency.</p>
+</div>
+</div>
+<div class="section" id="existing-alias-analysis-implementations-and-clients">
+<h2><a class="toc-backref" href="#id28">Existing alias analysis implementations and clients</a><a class="headerlink" href="#existing-alias-analysis-implementations-and-clients" title="Permalink to this headline">¶</a></h2>
+<p>If you’re going to be working with the LLVM alias analysis infrastructure, you
+should know what clients and implementations of alias analysis are available.
+In particular, if you are implementing an alias analysis, you should be aware of
+the <a class="reference internal" href="#the-clients">the clients</a> that are useful for monitoring and evaluating different
+implementations.</p>
+<div class="section" id="available-aliasanalysis-implementations">
+<span id="various-alias-analysis-implementations"></span><h3><a class="toc-backref" href="#id29">Available <tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> implementations</a><a class="headerlink" href="#available-aliasanalysis-implementations" title="Permalink to this headline">¶</a></h3>
+<p>This section lists the various implementations of the <tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt>
+interface. With the exception of the <a class="reference internal" href="#aliasanalysis-no-aa"><em>-no-aa</em></a>
+implementation, all of these <a class="reference internal" href="#aliasanalysis-chaining"><em>chain</em></a> to other
+alias analysis implementations.</p>
+<div class="section" id="the-no-aa-pass">
+<span id="aliasanalysis-no-aa"></span><h4><a class="toc-backref" href="#id30">The <tt class="docutils literal"><span class="pre">-no-aa</span></tt> pass</a><a class="headerlink" href="#the-no-aa-pass" title="Permalink to this headline">¶</a></h4>
+<p>The <tt class="docutils literal"><span class="pre">-no-aa</span></tt> pass is just like what it sounds: an alias analysis that never
+returns any useful information. This pass can be useful if you think that alias
+analysis is doing something wrong and are trying to narrow down a problem.</p>
+</div>
+<div class="section" id="the-basicaa-pass">
+<h4><a class="toc-backref" href="#id31">The <tt class="docutils literal"><span class="pre">-basicaa</span></tt> pass</a><a class="headerlink" href="#the-basicaa-pass" title="Permalink to this headline">¶</a></h4>
+<p>The <tt class="docutils literal"><span class="pre">-basicaa</span></tt> pass is an aggressive local analysis that <em>knows</em> many
+important facts:</p>
+<ul class="simple">
+<li>Distinct globals, stack allocations, and heap allocations can never alias.</li>
+<li>Globals, stack allocations, and heap allocations never alias the null pointer.</li>
+<li>Different fields of a structure do not alias.</li>
+<li>Indexes into arrays with statically differing subscripts cannot alias.</li>
+<li>Many common standard C library functions <a class="reference internal" href="#never-access-memory-or-only-read-memory">never access memory or only read
+memory</a>.</li>
+<li>Pointers that obviously point to constant globals “<tt class="docutils literal"><span class="pre">pointToConstantMemory</span></tt>”.</li>
+<li>Function calls can not modify or references stack allocations if they never
+escape from the function that allocates them (a common case for automatic
+arrays).</li>
+</ul>
+</div>
+<div class="section" id="the-globalsmodref-aa-pass">
+<h4><a class="toc-backref" href="#id32">The <tt class="docutils literal"><span class="pre">-globalsmodref-aa</span></tt> pass</a><a class="headerlink" href="#the-globalsmodref-aa-pass" title="Permalink to this headline">¶</a></h4>
+<p>This pass implements a simple context-sensitive mod/ref and alias analysis for
+internal global variables that don’t “have their address taken”. If a global
+does not have its address taken, the pass knows that no pointers alias the
+global. This pass also keeps track of functions that it knows never access
+memory or never read memory. This allows certain optimizations (e.g. GVN) to
+eliminate call instructions entirely.</p>
+<p>The real power of this pass is that it provides context-sensitive mod/ref
+information for call instructions. This allows the optimizer to know that calls
+to a function do not clobber or read the value of the global, allowing loads and
+stores to be eliminated.</p>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last">This pass is somewhat limited in its scope (only support non-address taken
+globals), but is very quick analysis.</p>
+</div>
+</div>
+<div class="section" id="the-steens-aa-pass">
+<h4><a class="toc-backref" href="#id33">The <tt class="docutils literal"><span class="pre">-steens-aa</span></tt> pass</a><a class="headerlink" href="#the-steens-aa-pass" title="Permalink to this headline">¶</a></h4>
+<p>The <tt class="docutils literal"><span class="pre">-steens-aa</span></tt> pass implements a variation on the well-known “Steensgaard’s
+algorithm” for interprocedural alias analysis. Steensgaard’s algorithm is a
+unification-based, flow-insensitive, context-insensitive, and field-insensitive
+alias analysis that is also very scalable (effectively linear time).</p>
+<p>The LLVM <tt class="docutils literal"><span class="pre">-steens-aa</span></tt> pass implements a “speculatively field-<strong>sensitive</strong>”
+version of Steensgaard’s algorithm using the Data Structure Analysis framework.
+This gives it substantially more precision than the standard algorithm while
+maintaining excellent analysis scalability.</p>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last"><tt class="docutils literal"><span class="pre">-steens-aa</span></tt> is available in the optional “poolalloc” module. It is not part
+of the LLVM core.</p>
+</div>
+</div>
+<div class="section" id="the-ds-aa-pass">
+<h4><a class="toc-backref" href="#id34">The <tt class="docutils literal"><span class="pre">-ds-aa</span></tt> pass</a><a class="headerlink" href="#the-ds-aa-pass" title="Permalink to this headline">¶</a></h4>
+<p>The <tt class="docutils literal"><span class="pre">-ds-aa</span></tt> pass implements the full Data Structure Analysis algorithm. Data
+Structure Analysis is a modular unification-based, flow-insensitive,
+context-<strong>sensitive</strong>, and speculatively field-<strong>sensitive</strong> alias
+analysis that is also quite scalable, usually at <tt class="docutils literal"><span class="pre">O(n</span> <span class="pre">*</span> <span class="pre">log(n))</span></tt>.</p>
+<p>This algorithm is capable of responding to a full variety of alias analysis
+queries, and can provide context-sensitive mod/ref information as well. The
+only major facility not implemented so far is support for must-alias
+information.</p>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last"><tt class="docutils literal"><span class="pre">-ds-aa</span></tt> is available in the optional “poolalloc” module. It is not part of
+the LLVM core.</p>
+</div>
+</div>
+<div class="section" id="the-scev-aa-pass">
+<h4><a class="toc-backref" href="#id35">The <tt class="docutils literal"><span class="pre">-scev-aa</span></tt> pass</a><a class="headerlink" href="#the-scev-aa-pass" title="Permalink to this headline">¶</a></h4>
+<p>The <tt class="docutils literal"><span class="pre">-scev-aa</span></tt> pass implements AliasAnalysis queries by translating them into
+ScalarEvolution queries. This gives it a more complete understanding of
+<tt class="docutils literal"><span class="pre">getelementptr</span></tt> instructions and loop induction variables than other alias
+analyses have.</p>
+</div>
+</div>
+<div class="section" id="alias-analysis-driven-transformations">
+<h3><a class="toc-backref" href="#id36">Alias analysis driven transformations</a><a class="headerlink" href="#alias-analysis-driven-transformations" title="Permalink to this headline">¶</a></h3>
+<p>LLVM includes several alias-analysis driven transformations which can be used
+with any of the implementations above.</p>
+<div class="section" id="the-adce-pass">
+<h4><a class="toc-backref" href="#id37">The <tt class="docutils literal"><span class="pre">-adce</span></tt> pass</a><a class="headerlink" href="#the-adce-pass" title="Permalink to this headline">¶</a></h4>
+<p>The <tt class="docutils literal"><span class="pre">-adce</span></tt> pass, which implements Aggressive Dead Code Elimination uses the
+<tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> interface to delete calls to functions that do not have
+side-effects and are not used.</p>
+</div>
+<div class="section" id="the-licm-pass">
+<h4><a class="toc-backref" href="#id38">The <tt class="docutils literal"><span class="pre">-licm</span></tt> pass</a><a class="headerlink" href="#the-licm-pass" title="Permalink to this headline">¶</a></h4>
+<p>The <tt class="docutils literal"><span class="pre">-licm</span></tt> pass implements various Loop Invariant Code Motion related
+transformations. It uses the <tt class="docutils literal"><span class="pre">AliasAnalysis</span></tt> interface for several different
+transformations:</p>
+<ul class="simple">
+<li>It uses mod/ref information to hoist or sink load instructions out of loops if
+there are no instructions in the loop that modifies the memory loaded.</li>
+<li>It uses mod/ref information to hoist function calls out of loops that do not
+write to memory and are loop-invariant.</li>
+<li>It uses alias information to promote memory objects that are loaded and stored
+to in loops to live in a register instead. It can do this if there are no may
+aliases to the loaded/stored memory location.</li>
+</ul>
+</div>
+<div class="section" id="the-argpromotion-pass">
+<h4><a class="toc-backref" href="#id39">The <tt class="docutils literal"><span class="pre">-argpromotion</span></tt> pass</a><a class="headerlink" href="#the-argpromotion-pass" title="Permalink to this headline">¶</a></h4>
+<p>The <tt class="docutils literal"><span class="pre">-argpromotion</span></tt> pass promotes by-reference arguments to be passed in
+by-value instead. In particular, if pointer arguments are only loaded from it
+passes in the value loaded instead of the address to the function. This pass
+uses alias information to make sure that the value loaded from the argument
+pointer is not modified between the entry of the function and any load of the
+pointer.</p>
+</div>
+<div class="section" id="the-gvn-memcpyopt-and-dse-passes">
+<h4><a class="toc-backref" href="#id40">The <tt class="docutils literal"><span class="pre">-gvn</span></tt>, <tt class="docutils literal"><span class="pre">-memcpyopt</span></tt>, and <tt class="docutils literal"><span class="pre">-dse</span></tt> passes</a><a class="headerlink" href="#the-gvn-memcpyopt-and-dse-passes" title="Permalink to this headline">¶</a></h4>
+<p>These passes use AliasAnalysis information to reason about loads and stores.</p>
+</div>
+</div>
+<div class="section" id="clients-for-debugging-and-evaluation-of-implementations">
+<span id="the-clients"></span><h3><a class="toc-backref" href="#id41">Clients for debugging and evaluation of implementations</a><a class="headerlink" href="#clients-for-debugging-and-evaluation-of-implementations" title="Permalink to this headline">¶</a></h3>
+<p>These passes are useful for evaluating the various alias analysis
+implementations. You can use them with commands like:</p>
+<div class="highlight-bash"><div class="highlight"><pre>% opt -ds-aa -aa-eval foo.bc -disable-output -stats
+</pre></div>
+</div>
+<div class="section" id="the-print-alias-sets-pass">
+<h4><a class="toc-backref" href="#id42">The <tt class="docutils literal"><span class="pre">-print-alias-sets</span></tt> pass</a><a class="headerlink" href="#the-print-alias-sets-pass" title="Permalink to this headline">¶</a></h4>
+<p>The <tt class="docutils literal"><span class="pre">-print-alias-sets</span></tt> pass is exposed as part of the <tt class="docutils literal"><span class="pre">opt</span></tt> tool to print
+out the Alias Sets formed by the <a class="reference internal" href="#aliassettracker">AliasSetTracker</a> class. This is useful if
+you’re using the <tt class="docutils literal"><span class="pre">AliasSetTracker</span></tt> class. To use it, use something like:</p>
+<div class="highlight-bash"><div class="highlight"><pre>% opt -ds-aa -print-alias-sets -disable-output
+</pre></div>
+</div>
+</div>
+<div class="section" id="the-aa-eval-pass">
+<h4><a class="toc-backref" href="#id43">The <tt class="docutils literal"><span class="pre">-aa-eval</span></tt> pass</a><a class="headerlink" href="#the-aa-eval-pass" title="Permalink to this headline">¶</a></h4>
+<p>The <tt class="docutils literal"><span class="pre">-aa-eval</span></tt> pass simply iterates through all pairs of pointers in a
+function and asks an alias analysis whether or not the pointers alias. This
+gives an indication of the precision of the alias analysis. Statistics are
+printed indicating the percent of no/may/must aliases found (a more precise
+algorithm will have a lower number of may aliases).</p>
+</div>
+</div>
+</div>
+<div class="section" id="memory-dependence-analysis">
+<h2><a class="toc-backref" href="#id44">Memory Dependence Analysis</a><a class="headerlink" href="#memory-dependence-analysis" title="Permalink to this headline">¶</a></h2>
+<div class="admonition note">
+<p class="first admonition-title">Note</p>
+<p class="last">We are currently in the process of migrating things from
+<tt class="docutils literal"><span class="pre">MemoryDependenceAnalysis</span></tt> to <a class="reference internal" href="MemorySSA.html"><em>MemorySSA</em></a>. Please try to use
+that instead.</p>
+</div>
+<p>If you’re just looking to be a client of alias analysis information, consider
+using the Memory Dependence Analysis interface instead. MemDep is a lazy,
+caching layer on top of alias analysis that is able to answer the question of
+what preceding memory operations a given instruction depends on, either at an
+intra- or inter-block level. Because of its laziness and caching policy, using
+MemDep can be a significant performance win over accessing alias analysis
+directly.</p>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="MemorySSA.html" title="MemorySSA"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="OptBisect.html" title="Using -opt-bisect-limit to debug optimization errors"
+ >previous</a> |</li>
+ <li><a href="http://llvm.org/">LLVM Home</a> | </li>
+ <li><a href="index.html">Documentation</a>»</li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2003-2018, LLVM Project.
+ Last updated on 2018-12-21.
+ Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.1.3.
+ </div>
+ </body>
+</html>
\ No newline at end of file
Added: www-releases/trunk/7.0.1/docs/Atomics.html
URL: http://llvm.org/viewvc/llvm-project/www-releases/trunk/7.0.1/docs/Atomics.html?rev=349965&view=auto
==============================================================================
--- www-releases/trunk/7.0.1/docs/Atomics.html (added)
+++ www-releases/trunk/7.0.1/docs/Atomics.html Fri Dec 21 13:53:02 2018
@@ -0,0 +1,632 @@
+
+
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>LLVM Atomic Instructions and Concurrency Guide — LLVM 7 documentation</title>
+
+ <link rel="stylesheet" href="_static/llvm-theme.css" type="text/css" />
+ <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '',
+ VERSION: '7',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="_static/jquery.js"></script>
+ <script type="text/javascript" src="_static/underscore.js"></script>
+ <script type="text/javascript" src="_static/doctools.js"></script>
+ <link rel="top" title="LLVM 7 documentation" href="index.html" />
+ <link rel="next" title="LLVM Coding Standards" href="CodingStandards.html" />
+ <link rel="prev" title="A guide to Dockerfiles for building LLVM" href="Docker.html" />
+<style type="text/css">
+ table.right { float: right; margin-left: 20px; }
+ table.right td { border: 1px solid #ccc; }
+</style>
+
+ </head>
+ <body>
+<div class="logo">
+ <a href="index.html">
+ <img src="_static/logo.png"
+ alt="LLVM Logo" width="250" height="88"/></a>
+</div>
+
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="CodingStandards.html" title="LLVM Coding Standards"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="Docker.html" title="A guide to Dockerfiles for building LLVM"
+ accesskey="P">previous</a> |</li>
+ <li><a href="http://llvm.org/">LLVM Home</a> | </li>
+ <li><a href="index.html">Documentation</a>»</li>
+
+ </ul>
+ </div>
+
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="body">
+
+ <div class="section" id="llvm-atomic-instructions-and-concurrency-guide">
+<h1>LLVM Atomic Instructions and Concurrency Guide<a class="headerlink" href="#llvm-atomic-instructions-and-concurrency-guide" title="Permalink to this headline">¶</a></h1>
+<div class="contents local topic" id="contents">
+<ul class="simple">
+<li><a class="reference internal" href="#introduction" id="id4">Introduction</a></li>
+<li><a class="reference internal" href="#optimization-outside-atomic" id="id5">Optimization outside atomic</a></li>
+<li><a class="reference internal" href="#atomic-instructions" id="id6">Atomic instructions</a></li>
+<li><a class="reference internal" href="#atomic-orderings" id="id7">Atomic orderings</a><ul>
+<li><a class="reference internal" href="#notatomic" id="id8">NotAtomic</a></li>
+<li><a class="reference internal" href="#unordered" id="id9">Unordered</a></li>
+<li><a class="reference internal" href="#monotonic" id="id10">Monotonic</a></li>
+<li><a class="reference internal" href="#acquire" id="id11">Acquire</a></li>
+<li><a class="reference internal" href="#release" id="id12">Release</a></li>
+<li><a class="reference internal" href="#acquirerelease" id="id13">AcquireRelease</a></li>
+<li><a class="reference internal" href="#sequentiallyconsistent" id="id14">SequentiallyConsistent</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#atomics-and-ir-optimization" id="id15">Atomics and IR optimization</a></li>
+<li><a class="reference internal" href="#atomics-and-codegen" id="id16">Atomics and Codegen</a></li>
+<li><a class="reference internal" href="#libcalls-atomic" id="id17">Libcalls: __atomic_*</a></li>
+<li><a class="reference internal" href="#libcalls-sync" id="id18">Libcalls: __sync_*</a></li>
+</ul>
+</div>
+<div class="section" id="introduction">
+<h2><a class="toc-backref" href="#id4">Introduction</a><a class="headerlink" href="#introduction" title="Permalink to this headline">¶</a></h2>
+<p>LLVM supports instructions which are well-defined in the presence of threads and
+asynchronous signals.</p>
+<p>The atomic instructions are designed specifically to provide readable IR and
+optimized code generation for the following:</p>
+<ul class="simple">
+<li>The C++11 <tt class="docutils literal"><span class="pre"><atomic></span></tt> header. (<a class="reference external" href="http://www.open-std.org/jtc1/sc22/wg21/">C++11 draft available here</a>.) (<a class="reference external" href="http://www.open-std.org/jtc1/sc22/wg14/">C11 draft available here</a>.)</li>
+<li>Proper semantics for Java-style memory, for both <tt class="docutils literal"><span class="pre">volatile</span></tt> and regular
+shared variables. (<a class="reference external" href="http://docs.oracle.com/javase/specs/jls/se8/html/jls-17.html">Java Specification</a>)</li>
+<li>gcc-compatible <tt class="docutils literal"><span class="pre">__sync_*</span></tt> builtins. (<a class="reference external" href="https://gcc.gnu.org/onlinedocs/gcc/_005f_005fsync-Builtins.html">Description</a>)</li>
+<li>Other scenarios with atomic semantics, including <tt class="docutils literal"><span class="pre">static</span></tt> variables with
+non-trivial constructors in C++.</li>
+</ul>
+<p>Atomic and volatile in the IR are orthogonal; “volatile” is the C/C++ volatile,
+which ensures that every volatile load and store happens and is performed in the
+stated order. A couple examples: if a SequentiallyConsistent store is
+immediately followed by another SequentiallyConsistent store to the same
+address, the first store can be erased. This transformation is not allowed for a
+pair of volatile stores. On the other hand, a non-volatile non-atomic load can
+be moved across a volatile load freely, but not an Acquire load.</p>
+<p>This document is intended to provide a guide to anyone either writing a frontend
+for LLVM or working on optimization passes for LLVM with a guide for how to deal
+with instructions with special semantics in the presence of concurrency. This
+is not intended to be a precise guide to the semantics; the details can get
+extremely complicated and unreadable, and are not usually necessary.</p>
+</div>
+<div class="section" id="optimization-outside-atomic">
+<span id="id1"></span><h2><a class="toc-backref" href="#id5">Optimization outside atomic</a><a class="headerlink" href="#optimization-outside-atomic" title="Permalink to this headline">¶</a></h2>
+<p>The basic <tt class="docutils literal"><span class="pre">'load'</span></tt> and <tt class="docutils literal"><span class="pre">'store'</span></tt> allow a variety of optimizations, but can
+lead to undefined results in a concurrent environment; see <a class="reference internal" href="#notatomic">NotAtomic</a>. This
+section specifically goes into the one optimizer restriction which applies in
+concurrent environments, which gets a bit more of an extended description
+because any optimization dealing with stores needs to be aware of it.</p>
+<p>From the optimizer’s point of view, the rule is that if there are not any
+instructions with atomic ordering involved, concurrency does not matter, with
+one exception: if a variable might be visible to another thread or signal
+handler, a store cannot be inserted along a path where it might not execute
+otherwise. Take the following example:</p>
+<div class="highlight-c"><div class="highlight"><pre><span class="cm">/* C code, for readability; run through clang -O2 -S -emit-llvm to get</span>
+<span class="cm"> equivalent IR */</span>
+ <span class="kt">int</span> <span class="n">x</span><span class="p">;</span>
+ <span class="kt">void</span> <span class="nf">f</span><span class="p">(</span><span class="kt">int</span><span class="o">*</span> <span class="n">a</span><span class="p">)</span> <span class="p">{</span>
+ <span class="k">for</span> <span class="p">(</span><span class="kt">int</span> <span class="n">i</span> <span class="o">=</span> <span class="mi">0</span><span class="p">;</span> <span class="n">i</span> <span class="o"><</span> <span class="mi">100</span><span class="p">;</span> <span class="n">i</span><span class="o">++</span><span class="p">)</span> <span class="p">{</span>
+ <span class="k">if</span> <span class="p">(</span><span class="n">a</span><span class="p">[</span><span class="n">i</span><span class="p">])</span>
+ <span class="n">x</span> <span class="o">+=</span> <span class="mi">1</span><span class="p">;</span>
+ <span class="p">}</span>
+ <span class="p">}</span>
+</pre></div>
+</div>
+<p>The following is equivalent in non-concurrent situations:</p>
+<div class="highlight-c"><div class="highlight"><pre><span class="kt">int</span> <span class="n">x</span><span class="p">;</span>
+<span class="kt">void</span> <span class="nf">f</span><span class="p">(</span><span class="kt">int</span><span class="o">*</span> <span class="n">a</span><span class="p">)</span> <span class="p">{</span>
+ <span class="kt">int</span> <span class="n">xtemp</span> <span class="o">=</span> <span class="n">x</span><span class="p">;</span>
+ <span class="k">for</span> <span class="p">(</span><span class="kt">int</span> <span class="n">i</span> <span class="o">=</span> <span class="mi">0</span><span class="p">;</span> <span class="n">i</span> <span class="o"><</span> <span class="mi">100</span><span class="p">;</span> <span class="n">i</span><span class="o">++</span><span class="p">)</span> <span class="p">{</span>
+ <span class="k">if</span> <span class="p">(</span><span class="n">a</span><span class="p">[</span><span class="n">i</span><span class="p">])</span>
+ <span class="n">xtemp</span> <span class="o">+=</span> <span class="mi">1</span><span class="p">;</span>
+ <span class="p">}</span>
+ <span class="n">x</span> <span class="o">=</span> <span class="n">xtemp</span><span class="p">;</span>
+<span class="p">}</span>
+</pre></div>
+</div>
+<p>However, LLVM is not allowed to transform the former to the latter: it could
+indirectly introduce undefined behavior if another thread can access <tt class="docutils literal"><span class="pre">x</span></tt> at
+the same time. (This example is particularly of interest because before the
+concurrency model was implemented, LLVM would perform this transformation.)</p>
+<p>Note that speculative loads are allowed; a load which is part of a race returns
+<tt class="docutils literal"><span class="pre">undef</span></tt>, but does not have undefined behavior.</p>
+</div>
+<div class="section" id="atomic-instructions">
+<h2><a class="toc-backref" href="#id6">Atomic instructions</a><a class="headerlink" href="#atomic-instructions" title="Permalink to this headline">¶</a></h2>
+<p>For cases where simple loads and stores are not sufficient, LLVM provides
+various atomic instructions. The exact guarantees provided depend on the
+ordering; see <a class="reference internal" href="#atomic-orderings">Atomic orderings</a>.</p>
+<p><tt class="docutils literal"><span class="pre">load</span> <span class="pre">atomic</span></tt> and <tt class="docutils literal"><span class="pre">store</span> <span class="pre">atomic</span></tt> provide the same basic functionality as
+non-atomic loads and stores, but provide additional guarantees in situations
+where threads and signals are involved.</p>
+<p><tt class="docutils literal"><span class="pre">cmpxchg</span></tt> and <tt class="docutils literal"><span class="pre">atomicrmw</span></tt> are essentially like an atomic load followed by an
+atomic store (where the store is conditional for <tt class="docutils literal"><span class="pre">cmpxchg</span></tt>), but no other
+memory operation can happen on any thread between the load and store.</p>
+<p>A <tt class="docutils literal"><span class="pre">fence</span></tt> provides Acquire and/or Release ordering which is not part of
+another operation; it is normally used along with Monotonic memory operations.
+A Monotonic load followed by an Acquire fence is roughly equivalent to an
+Acquire load, and a Monotonic store following a Release fence is roughly
+equivalent to a Release store. SequentiallyConsistent fences behave as both
+an Acquire and a Release fence, and offer some additional complicated
+guarantees, see the C++11 standard for details.</p>
+<p>Frontends generating atomic instructions generally need to be aware of the
+target to some degree; atomic instructions are guaranteed to be lock-free, and
+therefore an instruction which is wider than the target natively supports can be
+impossible to generate.</p>
+</div>
+<div class="section" id="atomic-orderings">
+<span id="id2"></span><h2><a class="toc-backref" href="#id7">Atomic orderings</a><a class="headerlink" href="#atomic-orderings" title="Permalink to this headline">¶</a></h2>
+<p>In order to achieve a balance between performance and necessary guarantees,
+there are six levels of atomicity. They are listed in order of strength; each
+level includes all the guarantees of the previous level except for
+Acquire/Release. (See also <a class="reference external" href="LangRef.html#ordering">LangRef Ordering</a>.)</p>
+<div class="section" id="notatomic">
+<span id="id3"></span><h3><a class="toc-backref" href="#id8">NotAtomic</a><a class="headerlink" href="#notatomic" title="Permalink to this headline">¶</a></h3>
+<p>NotAtomic is the obvious, a load or store which is not atomic. (This isn’t
+really a level of atomicity, but is listed here for comparison.) This is
+essentially a regular load or store. If there is a race on a given memory
+location, loads from that location return undef.</p>
+<dl class="docutils">
+<dt>Relevant standard</dt>
+<dd>This is intended to match shared variables in C/C++, and to be used in any
+other context where memory access is necessary, and a race is impossible. (The
+precise definition is in <a class="reference external" href="LangRef.html#memmodel">LangRef Memory Model</a>.)</dd>
+<dt>Notes for frontends</dt>
+<dd>The rule is essentially that all memory accessed with basic loads and stores
+by multiple threads should be protected by a lock or other synchronization;
+otherwise, you are likely to run into undefined behavior. If your frontend is
+for a “safe” language like Java, use Unordered to load and store any shared
+variable. Note that NotAtomic volatile loads and stores are not properly
+atomic; do not try to use them as a substitute. (Per the C/C++ standards,
+volatile does provide some limited guarantees around asynchronous signals, but
+atomics are generally a better solution.)</dd>
+<dt>Notes for optimizers</dt>
+<dd>Introducing loads to shared variables along a codepath where they would not
+otherwise exist is allowed; introducing stores to shared variables is not. See
+<a class="reference internal" href="#optimization-outside-atomic">Optimization outside atomic</a>.</dd>
+<dt>Notes for code generation</dt>
+<dd>The one interesting restriction here is that it is not allowed to write to
+bytes outside of the bytes relevant to a store. This is mostly relevant to
+unaligned stores: it is not allowed in general to convert an unaligned store
+into two aligned stores of the same width as the unaligned store. Backends are
+also expected to generate an i8 store as an i8 store, and not an instruction
+which writes to surrounding bytes. (If you are writing a backend for an
+architecture which cannot satisfy these restrictions and cares about
+concurrency, please send an email to llvm-dev.)</dd>
+</dl>
+</div>
+<div class="section" id="unordered">
+<h3><a class="toc-backref" href="#id9">Unordered</a><a class="headerlink" href="#unordered" title="Permalink to this headline">¶</a></h3>
+<p>Unordered is the lowest level of atomicity. It essentially guarantees that races
+produce somewhat sane results instead of having undefined behavior. It also
+guarantees the operation to be lock-free, so it does not depend on the data
+being part of a special atomic structure or depend on a separate per-process
+global lock. Note that code generation will fail for unsupported atomic
+operations; if you need such an operation, use explicit locking.</p>
+<dl class="docutils">
+<dt>Relevant standard</dt>
+<dd>This is intended to match the Java memory model for shared variables.</dd>
+<dt>Notes for frontends</dt>
+<dd>This cannot be used for synchronization, but is useful for Java and other
+“safe” languages which need to guarantee that the generated code never
+exhibits undefined behavior. Note that this guarantee is cheap on common
+platforms for loads of a native width, but can be expensive or unavailable for
+wider loads, like a 64-bit store on ARM. (A frontend for Java or other “safe”
+languages would normally split a 64-bit store on ARM into two 32-bit unordered
+stores.)</dd>
+<dt>Notes for optimizers</dt>
+<dd>In terms of the optimizer, this prohibits any transformation that transforms a
+single load into multiple loads, transforms a store into multiple stores,
+narrows a store, or stores a value which would not be stored otherwise. Some
+examples of unsafe optimizations are narrowing an assignment into a bitfield,
+rematerializing a load, and turning loads and stores into a memcpy
+call. Reordering unordered operations is safe, though, and optimizers should
+take advantage of that because unordered operations are common in languages
+that need them.</dd>
+<dt>Notes for code generation</dt>
+<dd>These operations are required to be atomic in the sense that if you use
+unordered loads and unordered stores, a load cannot see a value which was
+never stored. A normal load or store instruction is usually sufficient, but
+note that an unordered load or store cannot be split into multiple
+instructions (or an instruction which does multiple memory operations, like
+<tt class="docutils literal"><span class="pre">LDRD</span></tt> on ARM without LPAE, or not naturally-aligned <tt class="docutils literal"><span class="pre">LDRD</span></tt> on LPAE ARM).</dd>
+</dl>
+</div>
+<div class="section" id="monotonic">
+<h3><a class="toc-backref" href="#id10">Monotonic</a><a class="headerlink" href="#monotonic" title="Permalink to this headline">¶</a></h3>
+<p>Monotonic is the weakest level of atomicity that can be used in synchronization
+primitives, although it does not provide any general synchronization. It
+essentially guarantees that if you take all the operations affecting a specific
+address, a consistent ordering exists.</p>
+<dl class="docutils">
+<dt>Relevant standard</dt>
+<dd>This corresponds to the C++11/C11 <tt class="docutils literal"><span class="pre">memory_order_relaxed</span></tt>; see those
+standards for the exact definition.</dd>
+<dt>Notes for frontends</dt>
+<dd>If you are writing a frontend which uses this directly, use with caution. The
+guarantees in terms of synchronization are very weak, so make sure these are
+only used in a pattern which you know is correct. Generally, these would
+either be used for atomic operations which do not protect other memory (like
+an atomic counter), or along with a <tt class="docutils literal"><span class="pre">fence</span></tt>.</dd>
+<dt>Notes for optimizers</dt>
+<dd>In terms of the optimizer, this can be treated as a read+write on the relevant
+memory location (and alias analysis will take advantage of that). In addition,
+it is legal to reorder non-atomic and Unordered loads around Monotonic
+loads. CSE/DSE and a few other optimizations are allowed, but Monotonic
+operations are unlikely to be used in ways which would make those
+optimizations useful.</dd>
+<dt>Notes for code generation</dt>
+<dd>Code generation is essentially the same as that for unordered for loads and
+stores. No fences are required. <tt class="docutils literal"><span class="pre">cmpxchg</span></tt> and <tt class="docutils literal"><span class="pre">atomicrmw</span></tt> are required
+to appear as a single operation.</dd>
+</dl>
+</div>
+<div class="section" id="acquire">
+<h3><a class="toc-backref" href="#id11">Acquire</a><a class="headerlink" href="#acquire" title="Permalink to this headline">¶</a></h3>
+<p>Acquire provides a barrier of the sort necessary to acquire a lock to access
+other memory with normal loads and stores.</p>
+<dl class="docutils">
+<dt>Relevant standard</dt>
+<dd>This corresponds to the C++11/C11 <tt class="docutils literal"><span class="pre">memory_order_acquire</span></tt>. It should also be
+used for C++11/C11 <tt class="docutils literal"><span class="pre">memory_order_consume</span></tt>.</dd>
+<dt>Notes for frontends</dt>
+<dd>If you are writing a frontend which uses this directly, use with caution.
+Acquire only provides a semantic guarantee when paired with a Release
+operation.</dd>
+<dt>Notes for optimizers</dt>
+<dd>Optimizers not aware of atomics can treat this like a nothrow call. It is
+also possible to move stores from before an Acquire load or read-modify-write
+operation to after it, and move non-Acquire loads from before an Acquire
+operation to after it.</dd>
+<dt>Notes for code generation</dt>
+<dd>Architectures with weak memory ordering (essentially everything relevant today
+except x86 and SPARC) require some sort of fence to maintain the Acquire
+semantics. The precise fences required varies widely by architecture, but for
+a simple implementation, most architectures provide a barrier which is strong
+enough for everything (<tt class="docutils literal"><span class="pre">dmb</span></tt> on ARM, <tt class="docutils literal"><span class="pre">sync</span></tt> on PowerPC, etc.). Putting
+such a fence after the equivalent Monotonic operation is sufficient to
+maintain Acquire semantics for a memory operation.</dd>
+</dl>
+</div>
+<div class="section" id="release">
+<h3><a class="toc-backref" href="#id12">Release</a><a class="headerlink" href="#release" title="Permalink to this headline">¶</a></h3>
+<p>Release is similar to Acquire, but with a barrier of the sort necessary to
+release a lock.</p>
+<dl class="docutils">
+<dt>Relevant standard</dt>
+<dd>This corresponds to the C++11/C11 <tt class="docutils literal"><span class="pre">memory_order_release</span></tt>.</dd>
+<dt>Notes for frontends</dt>
+<dd>If you are writing a frontend which uses this directly, use with caution.
+Release only provides a semantic guarantee when paired with a Acquire
+operation.</dd>
+<dt>Notes for optimizers</dt>
+<dd>Optimizers not aware of atomics can treat this like a nothrow call. It is
+also possible to move loads from after a Release store or read-modify-write
+operation to before it, and move non-Release stores from after an Release
+operation to before it.</dd>
+<dt>Notes for code generation</dt>
+<dd>See the section on Acquire; a fence before the relevant operation is usually
+sufficient for Release. Note that a store-store fence is not sufficient to
+implement Release semantics; store-store fences are generally not exposed to
+IR because they are extremely difficult to use correctly.</dd>
+</dl>
+</div>
+<div class="section" id="acquirerelease">
+<h3><a class="toc-backref" href="#id13">AcquireRelease</a><a class="headerlink" href="#acquirerelease" title="Permalink to this headline">¶</a></h3>
+<p>AcquireRelease (<tt class="docutils literal"><span class="pre">acq_rel</span></tt> in IR) provides both an Acquire and a Release
+barrier (for fences and operations which both read and write memory).</p>
+<dl class="docutils">
+<dt>Relevant standard</dt>
+<dd>This corresponds to the C++11/C11 <tt class="docutils literal"><span class="pre">memory_order_acq_rel</span></tt>.</dd>
+<dt>Notes for frontends</dt>
+<dd>If you are writing a frontend which uses this directly, use with caution.
+Acquire only provides a semantic guarantee when paired with a Release
+operation, and vice versa.</dd>
+<dt>Notes for optimizers</dt>
+<dd>In general, optimizers should treat this like a nothrow call; the possible
+optimizations are usually not interesting.</dd>
+<dt>Notes for code generation</dt>
+<dd>This operation has Acquire and Release semantics; see the sections on Acquire
+and Release.</dd>
+</dl>
+</div>
+<div class="section" id="sequentiallyconsistent">
+<h3><a class="toc-backref" href="#id14">SequentiallyConsistent</a><a class="headerlink" href="#sequentiallyconsistent" title="Permalink to this headline">¶</a></h3>
+<p>SequentiallyConsistent (<tt class="docutils literal"><span class="pre">seq_cst</span></tt> in IR) provides Acquire semantics for loads
+and Release semantics for stores. Additionally, it guarantees that a total
+ordering exists between all SequentiallyConsistent operations.</p>
+<dl class="docutils">
+<dt>Relevant standard</dt>
+<dd>This corresponds to the C++11/C11 <tt class="docutils literal"><span class="pre">memory_order_seq_cst</span></tt>, Java volatile, and
+the gcc-compatible <tt class="docutils literal"><span class="pre">__sync_*</span></tt> builtins which do not specify otherwise.</dd>
+<dt>Notes for frontends</dt>
+<dd>If a frontend is exposing atomic operations, these are much easier to reason
+about for the programmer than other kinds of operations, and using them is
+generally a practical performance tradeoff.</dd>
+<dt>Notes for optimizers</dt>
+<dd>Optimizers not aware of atomics can treat this like a nothrow call. For
+SequentiallyConsistent loads and stores, the same reorderings are allowed as
+for Acquire loads and Release stores, except that SequentiallyConsistent
+operations may not be reordered.</dd>
+<dt>Notes for code generation</dt>
+<dd>SequentiallyConsistent loads minimally require the same barriers as Acquire
+operations and SequentiallyConsistent stores require Release
+barriers. Additionally, the code generator must enforce ordering between
+SequentiallyConsistent stores followed by SequentiallyConsistent loads. This
+is usually done by emitting either a full fence before the loads or a full
+fence after the stores; which is preferred varies by architecture.</dd>
+</dl>
+</div>
+</div>
+<div class="section" id="atomics-and-ir-optimization">
+<h2><a class="toc-backref" href="#id15">Atomics and IR optimization</a><a class="headerlink" href="#atomics-and-ir-optimization" title="Permalink to this headline">¶</a></h2>
+<p>Predicates for optimizer writers to query:</p>
+<ul class="simple">
+<li><tt class="docutils literal"><span class="pre">isSimple()</span></tt>: A load or store which is not volatile or atomic. This is
+what, for example, memcpyopt would check for operations it might transform.</li>
+<li><tt class="docutils literal"><span class="pre">isUnordered()</span></tt>: A load or store which is not volatile and at most
+Unordered. This would be checked, for example, by LICM before hoisting an
+operation.</li>
+<li><tt class="docutils literal"><span class="pre">mayReadFromMemory()</span></tt>/<tt class="docutils literal"><span class="pre">mayWriteToMemory()</span></tt>: Existing predicate, but note
+that they return true for any operation which is volatile or at least
+Monotonic.</li>
+<li><tt class="docutils literal"><span class="pre">isStrongerThan</span></tt> / <tt class="docutils literal"><span class="pre">isAtLeastOrStrongerThan</span></tt>: These are predicates on
+orderings. They can be useful for passes that are aware of atomics, for
+example to do DSE across a single atomic access, but not across a
+release-acquire pair (see MemoryDependencyAnalysis for an example of this)</li>
+<li>Alias analysis: Note that AA will return ModRef for anything Acquire or
+Release, and for the address accessed by any Monotonic operation.</li>
+</ul>
+<p>To support optimizing around atomic operations, make sure you are using the
+right predicates; everything should work if that is done. If your pass should
+optimize some atomic operations (Unordered operations in particular), make sure
+it doesn’t replace an atomic load or store with a non-atomic operation.</p>
+<p>Some examples of how optimizations interact with various kinds of atomic
+operations:</p>
+<ul class="simple">
+<li><tt class="docutils literal"><span class="pre">memcpyopt</span></tt>: An atomic operation cannot be optimized into part of a
+memcpy/memset, including unordered loads/stores. It can pull operations
+across some atomic operations.</li>
+<li>LICM: Unordered loads/stores can be moved out of a loop. It just treats
+monotonic operations like a read+write to a memory location, and anything
+stricter than that like a nothrow call.</li>
+<li>DSE: Unordered stores can be DSE’ed like normal stores. Monotonic stores can
+be DSE’ed in some cases, but it’s tricky to reason about, and not especially
+important. It is possible in some case for DSE to operate across a stronger
+atomic operation, but it is fairly tricky. DSE delegates this reasoning to
+MemoryDependencyAnalysis (which is also used by other passes like GVN).</li>
+<li>Folding a load: Any atomic load from a constant global can be constant-folded,
+because it cannot be observed. Similar reasoning allows sroa with
+atomic loads and stores.</li>
+</ul>
+</div>
+<div class="section" id="atomics-and-codegen">
+<h2><a class="toc-backref" href="#id16">Atomics and Codegen</a><a class="headerlink" href="#atomics-and-codegen" title="Permalink to this headline">¶</a></h2>
+<p>Atomic operations are represented in the SelectionDAG with <tt class="docutils literal"><span class="pre">ATOMIC_*</span></tt> opcodes.
+On architectures which use barrier instructions for all atomic ordering (like
+ARM), appropriate fences can be emitted by the AtomicExpand Codegen pass if
+<tt class="docutils literal"><span class="pre">setInsertFencesForAtomic()</span></tt> was used.</p>
+<p>The MachineMemOperand for all atomic operations is currently marked as volatile;
+this is not correct in the IR sense of volatile, but CodeGen handles anything
+marked volatile very conservatively. This should get fixed at some point.</p>
+<p>One very important property of the atomic operations is that if your backend
+supports any inline lock-free atomic operations of a given size, you should
+support <em>ALL</em> operations of that size in a lock-free manner.</p>
+<p>When the target implements atomic <tt class="docutils literal"><span class="pre">cmpxchg</span></tt> or LL/SC instructions (as most do)
+this is trivial: all the other operations can be implemented on top of those
+primitives. However, on many older CPUs (e.g. ARMv5, SparcV8, Intel 80386) there
+are atomic load and store instructions, but no <tt class="docutils literal"><span class="pre">cmpxchg</span></tt> or LL/SC. As it is
+invalid to implement <tt class="docutils literal"><span class="pre">atomic</span> <span class="pre">load</span></tt> using the native instruction, but
+<tt class="docutils literal"><span class="pre">cmpxchg</span></tt> using a library call to a function that uses a mutex, <tt class="docutils literal"><span class="pre">atomic</span>
+<span class="pre">load</span></tt> must <em>also</em> expand to a library call on such architectures, so that it
+can remain atomic with regards to a simultaneous <tt class="docutils literal"><span class="pre">cmpxchg</span></tt>, by using the same
+mutex.</p>
+<p>AtomicExpandPass can help with that: it will expand all atomic operations to the
+proper <tt class="docutils literal"><span class="pre">__atomic_*</span></tt> libcalls for any size above the maximum set by
+<tt class="docutils literal"><span class="pre">setMaxAtomicSizeInBitsSupported</span></tt> (which defaults to 0).</p>
+<p>On x86, all atomic loads generate a <tt class="docutils literal"><span class="pre">MOV</span></tt>. SequentiallyConsistent stores
+generate an <tt class="docutils literal"><span class="pre">XCHG</span></tt>, other stores generate a <tt class="docutils literal"><span class="pre">MOV</span></tt>. SequentiallyConsistent
+fences generate an <tt class="docutils literal"><span class="pre">MFENCE</span></tt>, other fences do not cause any code to be
+generated. <tt class="docutils literal"><span class="pre">cmpxchg</span></tt> uses the <tt class="docutils literal"><span class="pre">LOCK</span> <span class="pre">CMPXCHG</span></tt> instruction. <tt class="docutils literal"><span class="pre">atomicrmw</span> <span class="pre">xchg</span></tt>
+uses <tt class="docutils literal"><span class="pre">XCHG</span></tt>, <tt class="docutils literal"><span class="pre">atomicrmw</span> <span class="pre">add</span></tt> and <tt class="docutils literal"><span class="pre">atomicrmw</span> <span class="pre">sub</span></tt> use <tt class="docutils literal"><span class="pre">XADD</span></tt>, and all
+other <tt class="docutils literal"><span class="pre">atomicrmw</span></tt> operations generate a loop with <tt class="docutils literal"><span class="pre">LOCK</span> <span class="pre">CMPXCHG</span></tt>. Depending
+on the users of the result, some <tt class="docutils literal"><span class="pre">atomicrmw</span></tt> operations can be translated into
+operations like <tt class="docutils literal"><span class="pre">LOCK</span> <span class="pre">AND</span></tt>, but that does not work in general.</p>
+<p>On ARM (before v8), MIPS, and many other RISC architectures, Acquire, Release,
+and SequentiallyConsistent semantics require barrier instructions for every such
+operation. Loads and stores generate normal instructions. <tt class="docutils literal"><span class="pre">cmpxchg</span></tt> and
+<tt class="docutils literal"><span class="pre">atomicrmw</span></tt> can be represented using a loop with LL/SC-style instructions
+which take some sort of exclusive lock on a cache line (<tt class="docutils literal"><span class="pre">LDREX</span></tt> and <tt class="docutils literal"><span class="pre">STREX</span></tt>
+on ARM, etc.).</p>
+<p>It is often easiest for backends to use AtomicExpandPass to lower some of the
+atomic constructs. Here are some lowerings it can do:</p>
+<ul class="simple">
+<li>cmpxchg -> loop with load-linked/store-conditional
+by overriding <tt class="docutils literal"><span class="pre">shouldExpandAtomicCmpXchgInIR()</span></tt>, <tt class="docutils literal"><span class="pre">emitLoadLinked()</span></tt>,
+<tt class="docutils literal"><span class="pre">emitStoreConditional()</span></tt></li>
+<li>large loads/stores -> ll-sc/cmpxchg
+by overriding <tt class="docutils literal"><span class="pre">shouldExpandAtomicStoreInIR()</span></tt>/<tt class="docutils literal"><span class="pre">shouldExpandAtomicLoadInIR()</span></tt></li>
+<li>strong atomic accesses -> monotonic accesses + fences by overriding
+<tt class="docutils literal"><span class="pre">shouldInsertFencesForAtomic()</span></tt>, <tt class="docutils literal"><span class="pre">emitLeadingFence()</span></tt>, and
+<tt class="docutils literal"><span class="pre">emitTrailingFence()</span></tt></li>
+<li>atomic rmw -> loop with cmpxchg or load-linked/store-conditional
+by overriding <tt class="docutils literal"><span class="pre">expandAtomicRMWInIR()</span></tt></li>
+<li>expansion to __atomic_* libcalls for unsupported sizes.</li>
+</ul>
+<p>For an example of all of these, look at the ARM backend.</p>
+</div>
+<div class="section" id="libcalls-atomic">
+<h2><a class="toc-backref" href="#id17">Libcalls: __atomic_*</a><a class="headerlink" href="#libcalls-atomic" title="Permalink to this headline">¶</a></h2>
+<p>There are two kinds of atomic library calls that are generated by LLVM. Please
+note that both sets of library functions somewhat confusingly share the names of
+builtin functions defined by clang. Despite this, the library functions are
+not directly related to the builtins: it is <em>not</em> the case that <tt class="docutils literal"><span class="pre">__atomic_*</span></tt>
+builtins lower to <tt class="docutils literal"><span class="pre">__atomic_*</span></tt> library calls and <tt class="docutils literal"><span class="pre">__sync_*</span></tt> builtins lower
+to <tt class="docutils literal"><span class="pre">__sync_*</span></tt> library calls.</p>
+<p>The first set of library functions are named <tt class="docutils literal"><span class="pre">__atomic_*</span></tt>. This set has been
+“standardized” by GCC, and is described below. (See also <a class="reference external" href="https://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary">GCC’s documentation</a>)</p>
+<p>LLVM’s AtomicExpandPass will translate atomic operations on data sizes above
+<tt class="docutils literal"><span class="pre">MaxAtomicSizeInBitsSupported</span></tt> into calls to these functions.</p>
+<p>There are four generic functions, which can be called with data of any size or
+alignment:</p>
+<div class="highlight-python"><pre>void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
+void __atomic_store(size_t size, void *ptr, void *val, int ordering)
+void __atomic_exchange(size_t size, void *ptr, void *val, void *ret, int ordering)
+bool __atomic_compare_exchange(size_t size, void *ptr, void *expected, void *desired, int success_order, int failure_order)</pre>
+</div>
+<p>There are also size-specialized versions of the above functions, which can only
+be used with <em>naturally-aligned</em> pointers of the appropriate size. In the
+signatures below, “N” is one of 1, 2, 4, 8, and 16, and “iN” is the appropriate
+integer type of that size; if no such integer type exists, the specialization
+cannot be used:</p>
+<div class="highlight-python"><pre>iN __atomic_load_N(iN *ptr, iN val, int ordering)
+void __atomic_store_N(iN *ptr, iN val, int ordering)
+iN __atomic_exchange_N(iN *ptr, iN val, int ordering)
+bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired, int success_order, int failure_order)</pre>
+</div>
+<p>Finally there are some read-modify-write functions, which are only available in
+the size-specific variants (any other sizes use a <tt class="docutils literal"><span class="pre">__atomic_compare_exchange</span></tt>
+loop):</p>
+<div class="highlight-python"><pre>iN __atomic_fetch_add_N(iN *ptr, iN val, int ordering)
+iN __atomic_fetch_sub_N(iN *ptr, iN val, int ordering)
+iN __atomic_fetch_and_N(iN *ptr, iN val, int ordering)
+iN __atomic_fetch_or_N(iN *ptr, iN val, int ordering)
+iN __atomic_fetch_xor_N(iN *ptr, iN val, int ordering)
+iN __atomic_fetch_nand_N(iN *ptr, iN val, int ordering)</pre>
+</div>
+<p>This set of library functions have some interesting implementation requirements
+to take note of:</p>
+<ul class="simple">
+<li>They support all sizes and alignments – including those which cannot be
+implemented natively on any existing hardware. Therefore, they will certainly
+use mutexes in for some sizes/alignments.</li>
+<li>As a consequence, they cannot be shipped in a statically linked
+compiler-support library, as they have state which must be shared amongst all
+DSOs loaded in the program. They must be provided in a shared library used by
+all objects.</li>
+<li>The set of atomic sizes supported lock-free must be a superset of the sizes
+any compiler can emit. That is: if a new compiler introduces support for
+inline-lock-free atomics of size N, the <tt class="docutils literal"><span class="pre">__atomic_*</span></tt> functions must also have a
+lock-free implementation for size N. This is a requirement so that code
+produced by an old compiler (which will have called the <tt class="docutils literal"><span class="pre">__atomic_*</span></tt> function)
+interoperates with code produced by the new compiler (which will use native
+the atomic instruction).</li>
+</ul>
+<p>Note that it’s possible to write an entirely target-independent implementation
+of these library functions by using the compiler atomic builtins themselves to
+implement the operations on naturally-aligned pointers of supported sizes, and a
+generic mutex implementation otherwise.</p>
+</div>
+<div class="section" id="libcalls-sync">
+<h2><a class="toc-backref" href="#id18">Libcalls: __sync_*</a><a class="headerlink" href="#libcalls-sync" title="Permalink to this headline">¶</a></h2>
+<p>Some targets or OS/target combinations can support lock-free atomics, but for
+various reasons, it is not practical to emit the instructions inline.</p>
+<p>There’s two typical examples of this.</p>
+<p>Some CPUs support multiple instruction sets which can be swiched back and forth
+on function-call boundaries. For example, MIPS supports the MIPS16 ISA, which
+has a smaller instruction encoding than the usual MIPS32 ISA. ARM, similarly,
+has the Thumb ISA. In MIPS16 and earlier versions of Thumb, the atomic
+instructions are not encodable. However, those instructions are available via a
+function call to a function with the longer encoding.</p>
+<p>Additionally, a few OS/target pairs provide kernel-supported lock-free
+atomics. ARM/Linux is an example of this: the kernel <a class="reference external" href="https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt">provides</a> a
+function which on older CPUs contains a “magically-restartable” atomic sequence
+(which looks atomic so long as there’s only one CPU), and contains actual atomic
+instructions on newer multicore models. This sort of functionality can typically
+be provided on any architecture, if all CPUs which are missing atomic
+compare-and-swap support are uniprocessor (no SMP). This is almost always the
+case. The only common architecture without that property is SPARC – SPARCV8 SMP
+systems were common, yet it doesn’t support any sort of compare-and-swap
+operation.</p>
+<p>In either of these cases, the Target in LLVM can claim support for atomics of an
+appropriate size, and then implement some subset of the operations via libcalls
+to a <tt class="docutils literal"><span class="pre">__sync_*</span></tt> function. Such functions <em>must</em> not use locks in their
+implementation, because unlike the <tt class="docutils literal"><span class="pre">__atomic_*</span></tt> routines used by
+AtomicExpandPass, these may be mixed-and-matched with native instructions by the
+target lowering.</p>
+<p>Further, these routines do not need to be shared, as they are stateless. So,
+there is no issue with having multiple copies included in one binary. Thus,
+typically these routines are implemented by the statically-linked compiler
+runtime support library.</p>
+<p>LLVM will emit a call to an appropriate <tt class="docutils literal"><span class="pre">__sync_*</span></tt> routine if the target
+ISelLowering code has set the corresponding <tt class="docutils literal"><span class="pre">ATOMIC_CMPXCHG</span></tt>, <tt class="docutils literal"><span class="pre">ATOMIC_SWAP</span></tt>,
+or <tt class="docutils literal"><span class="pre">ATOMIC_LOAD_*</span></tt> operation to “Expand”, and if it has opted-into the
+availability of those library functions via a call to <tt class="docutils literal"><span class="pre">initSyncLibcalls()</span></tt>.</p>
+<p>The full set of functions that may be called by LLVM is (for <tt class="docutils literal"><span class="pre">N</span></tt> being 1, 2,
+4, 8, or 16):</p>
+<div class="highlight-python"><pre>iN __sync_val_compare_and_swap_N(iN *ptr, iN expected, iN desired)
+iN __sync_lock_test_and_set_N(iN *ptr, iN val)
+iN __sync_fetch_and_add_N(iN *ptr, iN val)
+iN __sync_fetch_and_sub_N(iN *ptr, iN val)
+iN __sync_fetch_and_and_N(iN *ptr, iN val)
+iN __sync_fetch_and_or_N(iN *ptr, iN val)
+iN __sync_fetch_and_xor_N(iN *ptr, iN val)
+iN __sync_fetch_and_nand_N(iN *ptr, iN val)
+iN __sync_fetch_and_max_N(iN *ptr, iN val)
+iN __sync_fetch_and_umax_N(iN *ptr, iN val)
+iN __sync_fetch_and_min_N(iN *ptr, iN val)
+iN __sync_fetch_and_umin_N(iN *ptr, iN val)</pre>
+</div>
+<p>This list doesn’t include any function for atomic load or store; all known
+architectures support atomic loads and stores directly (possibly by emitting a
+fence on either side of a normal load or store.)</p>
+<p>There’s also, somewhat separately, the possibility to lower <tt class="docutils literal"><span class="pre">ATOMIC_FENCE</span></tt> to
+<tt class="docutils literal"><span class="pre">__sync_synchronize()</span></tt>. This may happen or not happen independent of all the
+above, controlled purely by <tt class="docutils literal"><span class="pre">setOperationAction(ISD::ATOMIC_FENCE,</span> <span class="pre">...)</span></tt>.</p>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="CodingStandards.html" title="LLVM Coding Standards"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="Docker.html" title="A guide to Dockerfiles for building LLVM"
+ >previous</a> |</li>
+ <li><a href="http://llvm.org/">LLVM Home</a> | </li>
+ <li><a href="index.html">Documentation</a>»</li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2003-2018, LLVM Project.
+ Last updated on 2018-12-21.
+ Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.1.3.
+ </div>
+ </body>
+</html>
\ No newline at end of file
Added: www-releases/trunk/7.0.1/docs/Benchmarking.html
URL: http://llvm.org/viewvc/llvm-project/www-releases/trunk/7.0.1/docs/Benchmarking.html?rev=349965&view=auto
==============================================================================
--- www-releases/trunk/7.0.1/docs/Benchmarking.html (added)
+++ www-releases/trunk/7.0.1/docs/Benchmarking.html Fri Dec 21 13:53:02 2018
@@ -0,0 +1,181 @@
+
+
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Benchmarking tips — LLVM 7 documentation</title>
+
+ <link rel="stylesheet" href="_static/llvm-theme.css" type="text/css" />
+ <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '',
+ VERSION: '7',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="_static/jquery.js"></script>
+ <script type="text/javascript" src="_static/underscore.js"></script>
+ <script type="text/javascript" src="_static/doctools.js"></script>
+ <link rel="top" title="LLVM 7 documentation" href="index.html" />
+ <link rel="next" title="A guide to Dockerfiles for building LLVM" href="Docker.html" />
+ <link rel="prev" title="Reporting Guide" href="ReportingGuide.html" />
+<style type="text/css">
+ table.right { float: right; margin-left: 20px; }
+ table.right td { border: 1px solid #ccc; }
+</style>
+
+ </head>
+ <body>
+<div class="logo">
+ <a href="index.html">
+ <img src="_static/logo.png"
+ alt="LLVM Logo" width="250" height="88"/></a>
+</div>
+
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="Docker.html" title="A guide to Dockerfiles for building LLVM"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="ReportingGuide.html" title="Reporting Guide"
+ accesskey="P">previous</a> |</li>
+ <li><a href="http://llvm.org/">LLVM Home</a> | </li>
+ <li><a href="index.html">Documentation</a>»</li>
+
+ </ul>
+ </div>
+
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="body">
+
+ <div class="section" id="benchmarking-tips">
+<h1>Benchmarking tips<a class="headerlink" href="#benchmarking-tips" title="Permalink to this headline">¶</a></h1>
+<div class="section" id="introduction">
+<h2>Introduction<a class="headerlink" href="#introduction" title="Permalink to this headline">¶</a></h2>
+<p>For benchmarking a patch we want to reduce all possible sources of
+noise as much as possible. How to do that is very OS dependent.</p>
+<p>Note that low noise is required, but not sufficient. It does not
+exclude measurement bias. See
+<a class="reference external" href="https://www.cis.upenn.edu/~cis501/papers/producing-wrong-data.pdf">https://www.cis.upenn.edu/~cis501/papers/producing-wrong-data.pdf</a> for
+example.</p>
+</div>
+<div class="section" id="general">
+<h2>General<a class="headerlink" href="#general" title="Permalink to this headline">¶</a></h2>
+<ul>
+<li><p class="first">Use a high resolution timer, e.g. perf under linux.</p>
+</li>
+<li><p class="first">Run the benchmark multiple times to be able to recognize noise.</p>
+</li>
+<li><p class="first">Disable as many processes or services as possible on the target system.</p>
+</li>
+<li><p class="first">Disable frequency scaling, turbo boost and address space
+randomization (see OS specific section).</p>
+</li>
+<li><p class="first">Static link if the OS supports it. That avoids any variation that
+might be introduced by loading dynamic libraries. This can be done
+by passing <tt class="docutils literal"><span class="pre">-DLLVM_BUILD_STATIC=ON</span></tt> to cmake.</p>
+</li>
+<li><p class="first">Try to avoid storage. On some systems you can use tmpfs. Putting the
+program, inputs and outputs on tmpfs avoids touching a real storage
+system, which can have a pretty big variability.</p>
+<p>To mount it (on linux and freebsd at least):</p>
+<div class="highlight-python"><pre>mount -t tmpfs -o size=<XX>g none dir_to_mount</pre>
+</div>
+</li>
+</ul>
+</div>
+<div class="section" id="linux">
+<h2>Linux<a class="headerlink" href="#linux" title="Permalink to this headline">¶</a></h2>
+<ul>
+<li><p class="first">Disable address space randomization:</p>
+<div class="highlight-python"><pre>echo 0 > /proc/sys/kernel/randomize_va_space</pre>
+</div>
+</li>
+<li><p class="first">Set scaling_governor to performance:</p>
+<div class="highlight-python"><pre>for i in /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
+do
+ echo performance > /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
+done</pre>
+</div>
+</li>
+<li><p class="first">Use <a class="reference external" href="https://github.com/lpechacek/cpuset">https://github.com/lpechacek/cpuset</a> to reserve cpus for just the
+program you are benchmarking. If using perf, leave at least 2 cores
+so that perf runs in one and your program in another:</p>
+<div class="highlight-python"><pre>cset shield -c N1,N2 -k on</pre>
+</div>
+<p>This will move all threads out of N1 and N2. The <tt class="docutils literal"><span class="pre">-k</span> <span class="pre">on</span></tt> means
+that even kernel threads are moved out.</p>
+</li>
+<li><p class="first">Disable the SMT pair of the cpus you will use for the benchmark. The
+pair of cpu N can be found in
+<tt class="docutils literal"><span class="pre">/sys/devices/system/cpu/cpuN/topology/thread_siblings_list</span></tt> and
+disabled with:</p>
+<div class="highlight-python"><pre>echo 0 > /sys/devices/system/cpu/cpuX/online</pre>
+</div>
+</li>
+<li><p class="first">Run the program with:</p>
+<div class="highlight-python"><pre>cset shield --exec -- perf stat -r 10 <cmd></pre>
+</div>
+<p>This will run the command after <tt class="docutils literal"><span class="pre">--</span></tt> in the isolated cpus. The
+particular perf command runs the <tt class="docutils literal"><span class="pre"><cmd></span></tt> 10 times and reports
+statistics.</p>
+</li>
+</ul>
+<p>With these in place you can expect perf variations of less than 0.1%.</p>
+<div class="section" id="linux-intel">
+<h3>Linux Intel<a class="headerlink" href="#linux-intel" title="Permalink to this headline">¶</a></h3>
+<ul>
+<li><p class="first">Disable turbo mode:</p>
+<div class="highlight-python"><pre>echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo</pre>
+</div>
+</li>
+</ul>
+</div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="Docker.html" title="A guide to Dockerfiles for building LLVM"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="ReportingGuide.html" title="Reporting Guide"
+ >previous</a> |</li>
+ <li><a href="http://llvm.org/">LLVM Home</a> | </li>
+ <li><a href="index.html">Documentation</a>»</li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2003-2018, LLVM Project.
+ Last updated on 2018-12-21.
+ Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.1.3.
+ </div>
+ </body>
+</html>
\ No newline at end of file
Added: www-releases/trunk/7.0.1/docs/BigEndianNEON.html
URL: http://llvm.org/viewvc/llvm-project/www-releases/trunk/7.0.1/docs/BigEndianNEON.html?rev=349965&view=auto
==============================================================================
--- www-releases/trunk/7.0.1/docs/BigEndianNEON.html (added)
+++ www-releases/trunk/7.0.1/docs/BigEndianNEON.html Fri Dec 21 13:53:02 2018
@@ -0,0 +1,297 @@
+
+
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+
+ <title>Using ARM NEON instructions in big endian mode — LLVM 7 documentation</title>
+
+ <link rel="stylesheet" href="_static/llvm-theme.css" type="text/css" />
+ <link rel="stylesheet" href="_static/pygments.css" type="text/css" />
+
+ <script type="text/javascript">
+ var DOCUMENTATION_OPTIONS = {
+ URL_ROOT: '',
+ VERSION: '7',
+ COLLAPSE_INDEX: false,
+ FILE_SUFFIX: '.html',
+ HAS_SOURCE: true
+ };
+ </script>
+ <script type="text/javascript" src="_static/jquery.js"></script>
+ <script type="text/javascript" src="_static/underscore.js"></script>
+ <script type="text/javascript" src="_static/doctools.js"></script>
+ <link rel="top" title="LLVM 7 documentation" href="index.html" />
+ <link rel="next" title="LLVM Code Coverage Mapping Format" href="CoverageMappingFormat.html" />
+ <link rel="prev" title="Design and Usage of the InAlloca Attribute" href="InAlloca.html" />
+<style type="text/css">
+ table.right { float: right; margin-left: 20px; }
+ table.right td { border: 1px solid #ccc; }
+</style>
+
+ </head>
+ <body>
+<div class="logo">
+ <a href="index.html">
+ <img src="_static/logo.png"
+ alt="LLVM Logo" width="250" height="88"/></a>
+</div>
+
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ accesskey="I">index</a></li>
+ <li class="right" >
+ <a href="CoverageMappingFormat.html" title="LLVM Code Coverage Mapping Format"
+ accesskey="N">next</a> |</li>
+ <li class="right" >
+ <a href="InAlloca.html" title="Design and Usage of the InAlloca Attribute"
+ accesskey="P">previous</a> |</li>
+ <li><a href="http://llvm.org/">LLVM Home</a> | </li>
+ <li><a href="index.html">Documentation</a>»</li>
+
+ </ul>
+ </div>
+
+
+ <div class="document">
+ <div class="documentwrapper">
+ <div class="body">
+
+ <div class="section" id="using-arm-neon-instructions-in-big-endian-mode">
+<h1>Using ARM NEON instructions in big endian mode<a class="headerlink" href="#using-arm-neon-instructions-in-big-endian-mode" title="Permalink to this headline">¶</a></h1>
+<div class="contents local topic" id="contents">
+<ul class="simple">
+<li><a class="reference internal" href="#introduction" id="id3">Introduction</a><ul>
+<li><a class="reference internal" href="#example-c-level-intrinsics-assembly" id="id4">Example: C-level intrinsics -> assembly</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#problem" id="id5">Problem</a></li>
+<li><a class="reference internal" href="#ldr-and-ld1" id="id6"><tt class="docutils literal"><span class="pre">LDR</span></tt> and <tt class="docutils literal"><span class="pre">LD1</span></tt></a></li>
+<li><a class="reference internal" href="#considerations" id="id7">Considerations</a><ul>
+<li><a class="reference internal" href="#llvm-ir-lane-ordering" id="id8">LLVM IR Lane ordering</a></li>
+<li><a class="reference internal" href="#aapcs" id="id9">AAPCS</a></li>
+<li><a class="reference internal" href="#alignment" id="id10">Alignment</a></li>
+<li><a class="reference internal" href="#summary" id="id11">Summary</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#implementation" id="id12">Implementation</a><ul>
+<li><a class="reference internal" href="#bitconverts" id="id13">Bitconverts</a></li>
+</ul>
+</li>
+</ul>
+</div>
+<div class="section" id="introduction">
+<h2><a class="toc-backref" href="#id3">Introduction</a><a class="headerlink" href="#introduction" title="Permalink to this headline">¶</a></h2>
+<p>Generating code for big endian ARM processors is for the most part straightforward. NEON loads and stores however have some interesting properties that make code generation decisions less obvious in big endian mode.</p>
+<p>The aim of this document is to explain the problem with NEON loads and stores, and the solution that has been implemented in LLVM.</p>
+<p>In this document the term “vector” refers to what the ARM ABI calls a “short vector”, which is a sequence of items that can fit in a NEON register. This sequence can be 64 or 128 bits in length, and can constitute 8, 16, 32 or 64 bit items. This document refers to A64 instructions throughout, but is almost applicable to the A32/ARMv7 instruction sets also. The ABI format for passing vectors in A32 is sligtly different to A64. Apart from that, the same concepts apply.</p>
+<div class="section" id="example-c-level-intrinsics-assembly">
+<h3><a class="toc-backref" href="#id4">Example: C-level intrinsics -> assembly</a><a class="headerlink" href="#example-c-level-intrinsics-assembly" title="Permalink to this headline">¶</a></h3>
+<p>It may be helpful first to illustrate how C-level ARM NEON intrinsics are lowered to instructions.</p>
+<p>This trivial C function takes a vector of four ints and sets the zero’th lane to the value “42”:</p>
+<div class="highlight-python"><pre>#include <arm_neon.h>
+int32x4_t f(int32x4_t p) {
+ return vsetq_lane_s32(42, p, 0);
+}</pre>
+</div>
+<p>arm_neon.h intrinsics generate “generic” IR where possible (that is, normal IR instructions not <tt class="docutils literal"><span class="pre">llvm.arm.neon.*</span></tt> intrinsic calls). The above generates:</p>
+<div class="highlight-python"><pre>define <4 x i32> @f(<4 x i32> %p) {
+ %vset_lane = insertelement <4 x i32> %p, i32 42, i32 0
+ ret <4 x i32> %vset_lane
+}</pre>
+</div>
+<p>Which then becomes the following trivial assembly:</p>
+<div class="highlight-python"><pre>f: // @f
+ movz w8, #0x2a
+ ins v0.s[0], w8
+ ret</pre>
+</div>
+</div>
+</div>
+<div class="section" id="problem">
+<h2><a class="toc-backref" href="#id5">Problem</a><a class="headerlink" href="#problem" title="Permalink to this headline">¶</a></h2>
+<p>The main problem is how vectors are represented in memory and in registers.</p>
+<p>First, a recap. The “endianness” of an item affects its representation in memory only. In a register, a number is just a sequence of bits - 64 bits in the case of AArch64 general purpose registers. Memory, however, is a sequence of addressable units of 8 bits in size. Any number greater than 8 bits must therefore be split up into 8-bit chunks, and endianness describes the order in which these chunks are laid out in memory.</p>
+<p>A “little endian” layout has the least significant byte first (lowest in memory address). A “big endian” layout has the <em>most</em> significant byte first. This means that when loading an item from big endian memory, the lowest 8-bits in memory must go in the most significant 8-bits, and so forth.</p>
+</div>
+<div class="section" id="ldr-and-ld1">
+<h2><a class="toc-backref" href="#id6"><tt class="docutils literal"><span class="pre">LDR</span></tt> and <tt class="docutils literal"><span class="pre">LD1</span></tt></a><a class="headerlink" href="#ldr-and-ld1" title="Permalink to this headline">¶</a></h2>
+<div class="figure align-right">
+<img alt="_images/ARM-BE-ldr.png" src="_images/ARM-BE-ldr.png" />
+<p class="caption">Big endian vector load using <tt class="docutils literal"><span class="pre">LDR</span></tt>.</p>
+</div>
+<p>A vector is a consecutive sequence of items that are operated on simultaneously. To load a 64-bit vector, 64 bits need to be read from memory. In little endian mode, we can do this by just performing a 64-bit load - <tt class="docutils literal"><span class="pre">LDR</span> <span class="pre">q0,</span> <span class="pre">[foo]</span></tt>. However if we try this in big endian mode, because of the byte swapping the lane indices end up being swapped! The zero’th item as laid out in memory becomes the n’th lane in the vector.</p>
+<div class="figure align-right">
+<img alt="_images/ARM-BE-ld1.png" src="_images/ARM-BE-ld1.png" />
+<p class="caption">Big endian vector load using <tt class="docutils literal"><span class="pre">LD1</span></tt>. Note that the lanes retain the correct ordering.</p>
+</div>
+<p>Because of this, the instruction <tt class="docutils literal"><span class="pre">LD1</span></tt> performs a vector load but performs byte swapping not on the entire 64 bits, but on the individual items within the vector. This means that the register content is the same as it would have been on a little endian system.</p>
+<p>It may seem that <tt class="docutils literal"><span class="pre">LD1</span></tt> should suffice to peform vector loads on a big endian machine. However there are pros and cons to the two approaches that make it less than simple which register format to pick.</p>
+<p>There are two options:</p>
+<blockquote>
+<div><ol class="arabic simple">
+<li>The content of a vector register is the same <em>as if</em> it had been loaded with an <tt class="docutils literal"><span class="pre">LDR</span></tt> instruction.</li>
+<li>The content of a vector register is the same <em>as if</em> it had been loaded with an <tt class="docutils literal"><span class="pre">LD1</span></tt> instruction.</li>
+</ol>
+</div></blockquote>
+<p>Because <tt class="docutils literal"><span class="pre">LD1</span> <span class="pre">==</span> <span class="pre">LDR</span> <span class="pre">+</span> <span class="pre">REV</span></tt> and similarly <tt class="docutils literal"><span class="pre">LDR</span> <span class="pre">==</span> <span class="pre">LD1</span> <span class="pre">+</span> <span class="pre">REV</span></tt> (on a big endian system), we can simulate either type of load with the other type of load plus a <tt class="docutils literal"><span class="pre">REV</span></tt> instruction. So we’re not deciding which instructions to use, but which format to use (which will then influence which instruction is best to use).</p>
+<div class="clearer container">
+Note that throughout this section we only mention loads. Stores have exactly the same problems as their associated loads, so have been skipped for brevity.</div>
+</div>
+<div class="section" id="considerations">
+<h2><a class="toc-backref" href="#id7">Considerations</a><a class="headerlink" href="#considerations" title="Permalink to this headline">¶</a></h2>
+<div class="section" id="llvm-ir-lane-ordering">
+<h3><a class="toc-backref" href="#id8">LLVM IR Lane ordering</a><a class="headerlink" href="#llvm-ir-lane-ordering" title="Permalink to this headline">¶</a></h3>
+<p>LLVM IR has first class vector types. In LLVM IR, the zero’th element of a vector resides at the lowest memory address. The optimizer relies on this property in certain areas, for example when concatenating vectors together. The intention is for arrays and vectors to have identical memory layouts - <tt class="docutils literal"><span class="pre">[4</span> <span class="pre">x</span> <span class="pre">i8]</span></tt> and <tt class="docutils literal"><span class="pre"><4</span> <span class="pre">x</span> <span class="pre">i8></span></tt> should be represented the same in memory. Without this property there would be many special cases that the optimizer would have to cleverly handle.</p>
+<p>Use of <tt class="docutils literal"><span class="pre">LDR</span></tt> would break this lane ordering property. This doesn’t preclude the use of <tt class="docutils literal"><span class="pre">LDR</span></tt>, but we would have to do one of two things:</p>
+<blockquote>
+<div><ol class="arabic simple">
+<li>Insert a <tt class="docutils literal"><span class="pre">REV</span></tt> instruction to reverse the lane order after every <tt class="docutils literal"><span class="pre">LDR</span></tt>.</li>
+<li>Disable all optimizations that rely on lane layout, and for every access to an individual lane (<tt class="docutils literal"><span class="pre">insertelement</span></tt>/<tt class="docutils literal"><span class="pre">extractelement</span></tt>/<tt class="docutils literal"><span class="pre">shufflevector</span></tt>) reverse the lane index.</li>
+</ol>
+</div></blockquote>
+</div>
+<div class="section" id="aapcs">
+<h3><a class="toc-backref" href="#id9">AAPCS</a><a class="headerlink" href="#aapcs" title="Permalink to this headline">¶</a></h3>
+<p>The ARM procedure call standard (AAPCS) defines the ABI for passing vectors between functions in registers. It states:</p>
+<blockquote>
+<div><p>When a short vector is transferred between registers and memory it is treated as an opaque object. That is a short vector is stored in memory as if it were stored with a single <tt class="docutils literal"><span class="pre">STR</span></tt> of the entire register; a short vector is loaded from memory using the corresponding <tt class="docutils literal"><span class="pre">LDR</span></tt> instruction. On a little-endian system this means that element 0 will always contain the lowest addressed element of a short vector; on a big-endian system element 0 will contain the highest-addressed element of a short vector.</p>
+<p class="attribution">—Procedure Call Standard for the ARM 64-bit Architecture (AArch64), 4.1.2 Short Vectors</p>
+</div></blockquote>
+<p>The use of <tt class="docutils literal"><span class="pre">LDR</span></tt> and <tt class="docutils literal"><span class="pre">STR</span></tt> as the ABI defines has at least one advantage over <tt class="docutils literal"><span class="pre">LD1</span></tt> and <tt class="docutils literal"><span class="pre">ST1</span></tt>. <tt class="docutils literal"><span class="pre">LDR</span></tt> and <tt class="docutils literal"><span class="pre">STR</span></tt> are oblivious to the size of the individual lanes of a vector. <tt class="docutils literal"><span class="pre">LD1</span></tt> and <tt class="docutils literal"><span class="pre">ST1</span></tt> are not - the lane size is encoded within them. This is important across an ABI boundary, because it would become necessary to know the lane width the callee expects. Consider the following code:</p>
+<div class="highlight-c"><div class="highlight"><pre><span class="o"><</span><span class="n">callee</span><span class="p">.</span><span class="n">c</span><span class="o">></span>
+<span class="kt">void</span> <span class="n">callee</span><span class="p">(</span><span class="n">uint32x2_t</span> <span class="n">v</span><span class="p">)</span> <span class="p">{</span>
+ <span class="p">...</span>
+<span class="p">}</span>
+
+<span class="o"><</span><span class="n">caller</span><span class="p">.</span><span class="n">c</span><span class="o">></span>
+<span class="k">extern</span> <span class="kt">void</span> <span class="n">callee</span><span class="p">(</span><span class="n">uint32x2_t</span><span class="p">);</span>
+<span class="kt">void</span> <span class="nf">caller</span><span class="p">()</span> <span class="p">{</span>
+ <span class="n">callee</span><span class="p">(...);</span>
+<span class="p">}</span>
+</pre></div>
+</div>
+<p>If <tt class="docutils literal"><span class="pre">callee</span></tt> changed its signature to <tt class="docutils literal"><span class="pre">uint16x4_t</span></tt>, which is equivalent in register content, if we passed as <tt class="docutils literal"><span class="pre">LD1</span></tt> we’d break this code until <tt class="docutils literal"><span class="pre">caller</span></tt> was updated and recompiled.</p>
+<p>There is an argument that if the signatures of the two functions are different then the behaviour should be undefined. But there may be functions that are agnostic to the lane layout of the vector, and treating the vector as an opaque value (just loading it and storing it) would be impossible without a common format across ABI boundaries.</p>
+<p>So to preserve ABI compatibility, we need to use the <tt class="docutils literal"><span class="pre">LDR</span></tt> lane layout across function calls.</p>
+</div>
+<div class="section" id="alignment">
+<h3><a class="toc-backref" href="#id10">Alignment</a><a class="headerlink" href="#alignment" title="Permalink to this headline">¶</a></h3>
+<p>In strict alignment mode, <tt class="docutils literal"><span class="pre">LDR</span> <span class="pre">qX</span></tt> requires its address to be 128-bit aligned, whereas <tt class="docutils literal"><span class="pre">LD1</span></tt> only requires it to be as aligned as the lane size. If we canonicalised on using <tt class="docutils literal"><span class="pre">LDR</span></tt>, we’d still need to use <tt class="docutils literal"><span class="pre">LD1</span></tt> in some places to avoid alignment faults (the result of the <tt class="docutils literal"><span class="pre">LD1</span></tt> would then need to be reversed with <tt class="docutils literal"><span class="pre">REV</span></tt>).</p>
+<p>Most operating systems however do not run with alignment faults enabled, so this is often not an issue.</p>
+</div>
+<div class="section" id="summary">
+<h3><a class="toc-backref" href="#id11">Summary</a><a class="headerlink" href="#summary" title="Permalink to this headline">¶</a></h3>
+<p>The following table summarises the instructions that are required to be emitted for each property mentioned above for each of the two solutions.</p>
+<table border="1" class="docutils">
+<colgroup>
+<col width="37%" />
+<col width="37%" />
+<col width="25%" />
+</colgroup>
+<thead valign="bottom">
+<tr class="row-odd"><th class="head"> </th>
+<th class="head"><tt class="docutils literal"><span class="pre">LDR</span></tt> layout</th>
+<th class="head"><tt class="docutils literal"><span class="pre">LD1</span></tt> layout</th>
+</tr>
+</thead>
+<tbody valign="top">
+<tr class="row-even"><td>Lane ordering</td>
+<td><tt class="docutils literal"><span class="pre">LDR</span> <span class="pre">+</span> <span class="pre">REV</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">LD1</span></tt></td>
+</tr>
+<tr class="row-odd"><td>AAPCS</td>
+<td><tt class="docutils literal"><span class="pre">LDR</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">LD1</span> <span class="pre">+</span> <span class="pre">REV</span></tt></td>
+</tr>
+<tr class="row-even"><td>Alignment for strict mode</td>
+<td><tt class="docutils literal"><span class="pre">LDR</span></tt> / <tt class="docutils literal"><span class="pre">LD1</span> <span class="pre">+</span> <span class="pre">REV</span></tt></td>
+<td><tt class="docutils literal"><span class="pre">LD1</span></tt></td>
+</tr>
+</tbody>
+</table>
+<p>Neither approach is perfect, and choosing one boils down to choosing the lesser of two evils. The issue with lane ordering, it was decided, would have to change target-agnostic compiler passes and would result in a strange IR in which lane indices were reversed. It was decided that this was worse than the changes that would have to be made to support <tt class="docutils literal"><span class="pre">LD1</span></tt>, so <tt class="docutils literal"><span class="pre">LD1</span></tt> was chosen as the canonical vector load instruction (and by inference, <tt class="docutils literal"><span class="pre">ST1</span></tt> for vector stores).</p>
+</div>
+</div>
+<div class="section" id="implementation">
+<h2><a class="toc-backref" href="#id12">Implementation</a><a class="headerlink" href="#implementation" title="Permalink to this headline">¶</a></h2>
+<p>There are 3 parts to the implementation:</p>
+<blockquote>
+<div><ol class="arabic simple">
+<li>Predicate <tt class="docutils literal"><span class="pre">LDR</span></tt> and <tt class="docutils literal"><span class="pre">STR</span></tt> instructions so that they are never allowed to be selected to generate vector loads and stores. The exception is one-lane vectors <a class="footnote-reference" href="#id2" id="id1">[1]</a> - these by definition cannot have lane ordering problems so are fine to use <tt class="docutils literal"><span class="pre">LDR</span></tt>/<tt class="docutils literal"><span class="pre">STR</span></tt>.</li>
+<li>Create code generation patterns for bitconverts that create <tt class="docutils literal"><span class="pre">REV</span></tt> instructions.</li>
+<li>Make sure appropriate bitconverts are created so that vector values get passed over call boundaries as 1-element vectors (which is the same as if they were loaded with <tt class="docutils literal"><span class="pre">LDR</span></tt>).</li>
+</ol>
+</div></blockquote>
+<div class="section" id="bitconverts">
+<h3><a class="toc-backref" href="#id13">Bitconverts</a><a class="headerlink" href="#bitconverts" title="Permalink to this headline">¶</a></h3>
+<img alt="_images/ARM-BE-bitcastfail.png" class="align-right" src="_images/ARM-BE-bitcastfail.png" />
+<p>The main problem with the <tt class="docutils literal"><span class="pre">LD1</span></tt> solution is dealing with bitconverts (or bitcasts, or reinterpret casts). These are pseudo instructions that only change the compiler’s interpretation of data, not the underlying data itself. A requirement is that if data is loaded and then saved again (called a “round trip”), the memory contents should be the same after the store as before the load. If a vector is loaded and is then bitconverted to a different vector type before storing, the round trip will currently be broken.</p>
+<p>Take for example this code sequence:</p>
+<div class="highlight-python"><pre>%0 = load <4 x i32> %x
+%1 = bitcast <4 x i32> %0 to <2 x i64>
+ store <2 x i64> %1, <2 x i64>* %y</pre>
+</div>
+<p>This would produce a code sequence such as that in the figure on the right. The mismatched <tt class="docutils literal"><span class="pre">LD1</span></tt> and <tt class="docutils literal"><span class="pre">ST1</span></tt> cause the stored data to differ from the loaded data.</p>
+<div class="clearer container">
+When we see a bitcast from type <tt class="docutils literal"><span class="pre">X</span></tt> to type <tt class="docutils literal"><span class="pre">Y</span></tt>, what we need to do is to change the in-register representation of the data to be <em>as if</em> it had just been loaded by a <tt class="docutils literal"><span class="pre">LD1</span></tt> of type <tt class="docutils literal"><span class="pre">Y</span></tt>.</div>
+<img alt="_images/ARM-BE-bitcastsuccess.png" class="align-right" src="_images/ARM-BE-bitcastsuccess.png" />
+<p>Conceptually this is simple - we can insert a <tt class="docutils literal"><span class="pre">REV</span></tt> undoing the <tt class="docutils literal"><span class="pre">LD1</span></tt> of type <tt class="docutils literal"><span class="pre">X</span></tt> (converting the in-register representation to the same as if it had been loaded by <tt class="docutils literal"><span class="pre">LDR</span></tt>) and then insert another <tt class="docutils literal"><span class="pre">REV</span></tt> to change the representation to be as if it had been loaded by an <tt class="docutils literal"><span class="pre">LD1</span></tt> of type <tt class="docutils literal"><span class="pre">Y</span></tt>.</p>
+<p>For the previous example, this would be:</p>
+<div class="highlight-python"><pre>LD1 v0.4s, [x]
+
+REV64 v0.4s, v0.4s // There is no REV128 instruction, so it must be synthesizedcd
+EXT v0.16b, v0.16b, v0.16b, #8 // with a REV64 then an EXT to swap the two 64-bit elements.
+
+REV64 v0.2d, v0.2d
+EXT v0.16b, v0.16b, v0.16b, #8
+
+ST1 v0.2d, [y]</pre>
+</div>
+<p>It turns out that these <tt class="docutils literal"><span class="pre">REV</span></tt> pairs can, in almost all cases, be squashed together into a single <tt class="docutils literal"><span class="pre">REV</span></tt>. For the example above, a <tt class="docutils literal"><span class="pre">REV128</span> <span class="pre">4s</span></tt> + <tt class="docutils literal"><span class="pre">REV128</span> <span class="pre">2d</span></tt> is actually a <tt class="docutils literal"><span class="pre">REV64</span> <span class="pre">4s</span></tt>, as shown in the figure on the right.</p>
+<table class="docutils footnote" frame="void" id="id2" rules="none">
+<colgroup><col class="label" /><col /></colgroup>
+<tbody valign="top">
+<tr><td class="label"><a class="fn-backref" href="#id1">[1]</a></td><td>One lane vectors may seem useless as a concept but they serve to distinguish between values held in general purpose registers and values held in NEON/VFP registers. For example, an <tt class="docutils literal"><span class="pre">i64</span></tt> would live in an <tt class="docutils literal"><span class="pre">x</span></tt> register, but <tt class="docutils literal"><span class="pre"><1</span> <span class="pre">x</span> <span class="pre">i64></span></tt> would live in a <tt class="docutils literal"><span class="pre">d</span></tt> register.</td></tr>
+</tbody>
+</table>
+</div>
+</div>
+</div>
+
+
+ </div>
+ </div>
+ <div class="clearer"></div>
+ </div>
+ <div class="related">
+ <h3>Navigation</h3>
+ <ul>
+ <li class="right" style="margin-right: 10px">
+ <a href="genindex.html" title="General Index"
+ >index</a></li>
+ <li class="right" >
+ <a href="CoverageMappingFormat.html" title="LLVM Code Coverage Mapping Format"
+ >next</a> |</li>
+ <li class="right" >
+ <a href="InAlloca.html" title="Design and Usage of the InAlloca Attribute"
+ >previous</a> |</li>
+ <li><a href="http://llvm.org/">LLVM Home</a> | </li>
+ <li><a href="index.html">Documentation</a>»</li>
+
+ </ul>
+ </div>
+ <div class="footer">
+ © Copyright 2003-2018, LLVM Project.
+ Last updated on 2018-12-21.
+ Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> 1.1.3.
+ </div>
+ </body>
+</html>
\ No newline at end of file
More information about the llvm-commits
mailing list