[llvm] df0c4bf - AMDGPU: Add some baseline immediate encoding test changes

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Sun Jun 14 10:37:53 PDT 2020


Author: Matt Arsenault
Date: 2020-06-14T13:29:35-04:00
New Revision: df0c4bfc959838b67a020057cc1dc96e02192806

URL: https://github.com/llvm/llvm-project/commit/df0c4bfc959838b67a020057cc1dc96e02192806
DIFF: https://github.com/llvm/llvm-project/commit/df0c4bfc959838b67a020057cc1dc96e02192806.diff

LOG: AMDGPU: Add some baseline immediate encoding test changes

Add some encoding checks and add a few new cases.

Added: 
    

Modified: 
    llvm/test/CodeGen/AMDGPU/imm16.ll
    llvm/test/CodeGen/AMDGPU/immv216.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/imm16.ll b/llvm/test/CodeGen/AMDGPU/imm16.ll
index 57151afa75cf..5c7ad536bbbf 100644
--- a/llvm/test/CodeGen/AMDGPU/imm16.ll
+++ b/llvm/test/CodeGen/AMDGPU/imm16.ll
@@ -1,19 +1,31 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
 
 ; FIXME: Merge into imm.ll
 
 define amdgpu_kernel void @store_inline_imm_neg_0.0_i16(i16 addrspace(1)* %out) {
+; GFX10-LABEL: store_inline_imm_neg_0.0_i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
+; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: store_inline_imm_neg_0.0_i16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    v_mov_b32_e32 v0, 0xffff8000
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: store_inline_imm_neg_0.0_i16:
 ; SI:       ; %bb.0:
@@ -29,15 +41,26 @@ define amdgpu_kernel void @store_inline_imm_neg_0.0_i16(i16 addrspace(1)* %out)
 }
 
 define amdgpu_kernel void @store_inline_imm_0.0_f16(half addrspace(1)* %out) {
+; GFX10-LABEL: store_inline_imm_0.0_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
+; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: store_inline_imm_0.0_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    v_mov_b32_e32 v0, 0
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: store_inline_imm_0.0_f16:
 ; SI:       ; %bb.0:
@@ -53,15 +76,26 @@ define amdgpu_kernel void @store_inline_imm_0.0_f16(half addrspace(1)* %out) {
 }
 
 define amdgpu_kernel void @store_imm_neg_0.0_f16(half addrspace(1)* %out) {
+; GFX10-LABEL: store_imm_neg_0.0_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
+; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: store_imm_neg_0.0_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    v_mov_b32_e32 v0, 0xffff8000
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: store_imm_neg_0.0_f16:
 ; SI:       ; %bb.0:
@@ -77,15 +111,26 @@ define amdgpu_kernel void @store_imm_neg_0.0_f16(half addrspace(1)* %out) {
 }
 
 define amdgpu_kernel void @store_inline_imm_0.5_f16(half addrspace(1)* %out) {
+; GFX10-LABEL: store_inline_imm_0.5_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00]
+; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: store_inline_imm_0.5_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    v_mov_b32_e32 v0, 0x3800
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: store_inline_imm_0.5_f16:
 ; SI:       ; %bb.0:
@@ -101,15 +146,26 @@ define amdgpu_kernel void @store_inline_imm_0.5_f16(half addrspace(1)* %out) {
 }
 
 define amdgpu_kernel void @store_inline_imm_m_0.5_f16(half addrspace(1)* %out) {
+; GFX10-LABEL: store_inline_imm_m_0.5_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff]
+; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: store_inline_imm_m_0.5_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    v_mov_b32_e32 v0, 0xffffb800
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: store_inline_imm_m_0.5_f16:
 ; SI:       ; %bb.0:
@@ -125,15 +181,26 @@ define amdgpu_kernel void @store_inline_imm_m_0.5_f16(half addrspace(1)* %out) {
 }
 
 define amdgpu_kernel void @store_inline_imm_1.0_f16(half addrspace(1)* %out) {
+; GFX10-LABEL: store_inline_imm_1.0_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00]
+; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: store_inline_imm_1.0_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    v_mov_b32_e32 v0, 0x3c00
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: store_inline_imm_1.0_f16:
 ; SI:       ; %bb.0:
@@ -149,15 +216,26 @@ define amdgpu_kernel void @store_inline_imm_1.0_f16(half addrspace(1)* %out) {
 }
 
 define amdgpu_kernel void @store_inline_imm_m_1.0_f16(half addrspace(1)* %out) {
+; GFX10-LABEL: store_inline_imm_m_1.0_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff]
+; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: store_inline_imm_m_1.0_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    v_mov_b32_e32 v0, 0xffffbc00
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: store_inline_imm_m_1.0_f16:
 ; SI:       ; %bb.0:
@@ -173,15 +251,26 @@ define amdgpu_kernel void @store_inline_imm_m_1.0_f16(half addrspace(1)* %out) {
 }
 
 define amdgpu_kernel void @store_inline_imm_2.0_f16(half addrspace(1)* %out) {
+; GFX10-LABEL: store_inline_imm_2.0_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00]
+; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: store_inline_imm_2.0_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    v_mov_b32_e32 v0, 0x4000
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: store_inline_imm_2.0_f16:
 ; SI:       ; %bb.0:
@@ -197,15 +286,26 @@ define amdgpu_kernel void @store_inline_imm_2.0_f16(half addrspace(1)* %out) {
 }
 
 define amdgpu_kernel void @store_inline_imm_m_2.0_f16(half addrspace(1)* %out) {
+; GFX10-LABEL: store_inline_imm_m_2.0_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff]
+; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: store_inline_imm_m_2.0_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    v_mov_b32_e32 v0, 0xffffc000
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: store_inline_imm_m_2.0_f16:
 ; SI:       ; %bb.0:
@@ -221,15 +321,26 @@ define amdgpu_kernel void @store_inline_imm_m_2.0_f16(half addrspace(1)* %out) {
 }
 
 define amdgpu_kernel void @store_inline_imm_4.0_f16(half addrspace(1)* %out) {
+; GFX10-LABEL: store_inline_imm_4.0_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00]
+; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: store_inline_imm_4.0_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    v_mov_b32_e32 v0, 0x4400
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: store_inline_imm_4.0_f16:
 ; SI:       ; %bb.0:
@@ -245,15 +356,26 @@ define amdgpu_kernel void @store_inline_imm_4.0_f16(half addrspace(1)* %out) {
 }
 
 define amdgpu_kernel void @store_inline_imm_m_4.0_f16(half addrspace(1)* %out) {
+; GFX10-LABEL: store_inline_imm_m_4.0_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff]
+; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: store_inline_imm_m_4.0_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    v_mov_b32_e32 v0, 0xffffc400
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: store_inline_imm_m_4.0_f16:
 ; SI:       ; %bb.0:
@@ -269,15 +391,26 @@ define amdgpu_kernel void @store_inline_imm_m_4.0_f16(half addrspace(1)* %out) {
 }
 
 define amdgpu_kernel void @store_inline_imm_inv_2pi_f16(half addrspace(1)* %out) {
+; GFX10-LABEL: store_inline_imm_inv_2pi_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00]
+; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: store_inline_imm_inv_2pi_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    v_mov_b32_e32 v0, 0x3118
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: store_inline_imm_inv_2pi_f16:
 ; SI:       ; %bb.0:
@@ -293,15 +426,26 @@ define amdgpu_kernel void @store_inline_imm_inv_2pi_f16(half addrspace(1)* %out)
 }
 
 define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f16(half addrspace(1)* %out) {
+; GFX10-LABEL: store_inline_imm_m_inv_2pi_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff]
+; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: store_inline_imm_m_inv_2pi_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    v_mov_b32_e32 v0, 0xffffb118
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: store_inline_imm_m_inv_2pi_f16:
 ; SI:       ; %bb.0:
@@ -317,15 +461,26 @@ define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f16(half addrspace(1)* %ou
 }
 
 define amdgpu_kernel void @store_literal_imm_f16(half addrspace(1)* %out) {
+; GFX10-LABEL: store_literal_imm_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00]
+; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: store_literal_imm_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    v_mov_b32_e32 v0, 0x6c00
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: store_literal_imm_f16:
 ; SI:       ; %bb.0:
@@ -341,16 +496,29 @@ define amdgpu_kernel void @store_literal_imm_f16(half addrspace(1)* %out) {
 }
 
 define amdgpu_kernel void @add_inline_imm_0.0_f16(half addrspace(1)* %out, half %x) {
+; GFX10-LABEL: add_inline_imm_0.0_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
+; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    v_add_f16_e64 v0, s2, 0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x00,0x01,0x00]
+; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
+; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: add_inline_imm_0.0_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VI-NEXT:    s_load_dword s4, s[4:5], 0x8
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_add_f16_e64 v0, s4, 0
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_load_dword s4, s[4:5], 0x8 ; encoding: [0x02,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    v_add_f16_e64 v0, s4, 0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x00,0x01,0x00]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: add_inline_imm_0.0_f16:
 ; SI:       ; %bb.0:
@@ -370,16 +538,29 @@ define amdgpu_kernel void @add_inline_imm_0.0_f16(half addrspace(1)* %out, half
 }
 
 define amdgpu_kernel void @add_inline_imm_0.5_f16(half addrspace(1)* %out, half %x) {
+; GFX10-LABEL: add_inline_imm_0.5_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
+; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    v_add_f16_e64 v0, s2, 0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe0,0x01,0x00]
+; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
+; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: add_inline_imm_0.5_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VI-NEXT:    s_load_dword s4, s[4:5], 0x8
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_add_f16_e64 v0, s4, 0.5
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_load_dword s4, s[4:5], 0x8 ; encoding: [0x02,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    v_add_f16_e64 v0, s4, 0.5 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xe0,0x01,0x00]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: add_inline_imm_0.5_f16:
 ; SI:       ; %bb.0:
@@ -399,16 +580,29 @@ define amdgpu_kernel void @add_inline_imm_0.5_f16(half addrspace(1)* %out, half
 }
 
 define amdgpu_kernel void @add_inline_imm_neg_0.5_f16(half addrspace(1)* %out, half %x) {
+; GFX10-LABEL: add_inline_imm_neg_0.5_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
+; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    v_add_f16_e64 v0, s2, -0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe2,0x01,0x00]
+; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
+; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: add_inline_imm_neg_0.5_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VI-NEXT:    s_load_dword s4, s[4:5], 0x8
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_add_f16_e64 v0, s4, -0.5
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_load_dword s4, s[4:5], 0x8 ; encoding: [0x02,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    v_add_f16_e64 v0, s4, -0.5 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xe2,0x01,0x00]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: add_inline_imm_neg_0.5_f16:
 ; SI:       ; %bb.0:
@@ -428,16 +622,29 @@ define amdgpu_kernel void @add_inline_imm_neg_0.5_f16(half addrspace(1)* %out, h
 }
 
 define amdgpu_kernel void @add_inline_imm_1.0_f16(half addrspace(1)* %out, half %x) {
+; GFX10-LABEL: add_inline_imm_1.0_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
+; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    v_add_f16_e64 v0, s2, 1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe4,0x01,0x00]
+; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
+; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: add_inline_imm_1.0_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VI-NEXT:    s_load_dword s4, s[4:5], 0x8
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_add_f16_e64 v0, s4, 1.0
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_load_dword s4, s[4:5], 0x8 ; encoding: [0x02,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    v_add_f16_e64 v0, s4, 1.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xe4,0x01,0x00]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: add_inline_imm_1.0_f16:
 ; SI:       ; %bb.0:
@@ -457,16 +664,29 @@ define amdgpu_kernel void @add_inline_imm_1.0_f16(half addrspace(1)* %out, half
 }
 
 define amdgpu_kernel void @add_inline_imm_neg_1.0_f16(half addrspace(1)* %out, half %x) {
+; GFX10-LABEL: add_inline_imm_neg_1.0_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
+; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    v_add_f16_e64 v0, s2, -1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe6,0x01,0x00]
+; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
+; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: add_inline_imm_neg_1.0_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VI-NEXT:    s_load_dword s4, s[4:5], 0x8
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_add_f16_e64 v0, s4, -1.0
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_load_dword s4, s[4:5], 0x8 ; encoding: [0x02,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    v_add_f16_e64 v0, s4, -1.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xe6,0x01,0x00]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: add_inline_imm_neg_1.0_f16:
 ; SI:       ; %bb.0:
@@ -486,16 +706,29 @@ define amdgpu_kernel void @add_inline_imm_neg_1.0_f16(half addrspace(1)* %out, h
 }
 
 define amdgpu_kernel void @add_inline_imm_2.0_f16(half addrspace(1)* %out, half %x) {
+; GFX10-LABEL: add_inline_imm_2.0_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
+; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    v_add_f16_e64 v0, s2, 2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe8,0x01,0x00]
+; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
+; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: add_inline_imm_2.0_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VI-NEXT:    s_load_dword s4, s[4:5], 0x8
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_add_f16_e64 v0, s4, 2.0
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_load_dword s4, s[4:5], 0x8 ; encoding: [0x02,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    v_add_f16_e64 v0, s4, 2.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xe8,0x01,0x00]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: add_inline_imm_2.0_f16:
 ; SI:       ; %bb.0:
@@ -515,16 +748,29 @@ define amdgpu_kernel void @add_inline_imm_2.0_f16(half addrspace(1)* %out, half
 }
 
 define amdgpu_kernel void @add_inline_imm_neg_2.0_f16(half addrspace(1)* %out, half %x) {
+; GFX10-LABEL: add_inline_imm_neg_2.0_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
+; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    v_add_f16_e64 v0, s2, -2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xea,0x01,0x00]
+; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
+; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: add_inline_imm_neg_2.0_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VI-NEXT:    s_load_dword s4, s[4:5], 0x8
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_add_f16_e64 v0, s4, -2.0
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_load_dword s4, s[4:5], 0x8 ; encoding: [0x02,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    v_add_f16_e64 v0, s4, -2.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xea,0x01,0x00]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: add_inline_imm_neg_2.0_f16:
 ; SI:       ; %bb.0:
@@ -544,16 +790,29 @@ define amdgpu_kernel void @add_inline_imm_neg_2.0_f16(half addrspace(1)* %out, h
 }
 
 define amdgpu_kernel void @add_inline_imm_4.0_f16(half addrspace(1)* %out, half %x) {
+; GFX10-LABEL: add_inline_imm_4.0_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
+; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    v_add_f16_e64 v0, s2, 4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xec,0x01,0x00]
+; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
+; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: add_inline_imm_4.0_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VI-NEXT:    s_load_dword s4, s[4:5], 0x8
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_add_f16_e64 v0, s4, 4.0
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_load_dword s4, s[4:5], 0x8 ; encoding: [0x02,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    v_add_f16_e64 v0, s4, 4.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xec,0x01,0x00]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: add_inline_imm_4.0_f16:
 ; SI:       ; %bb.0:
@@ -573,16 +832,29 @@ define amdgpu_kernel void @add_inline_imm_4.0_f16(half addrspace(1)* %out, half
 }
 
 define amdgpu_kernel void @add_inline_imm_neg_4.0_f16(half addrspace(1)* %out, half %x) {
+; GFX10-LABEL: add_inline_imm_neg_4.0_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
+; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    v_add_f16_e64 v0, s2, -4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xee,0x01,0x00]
+; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
+; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: add_inline_imm_neg_4.0_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VI-NEXT:    s_load_dword s4, s[4:5], 0x8
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_add_f16_e64 v0, s4, -4.0
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_load_dword s4, s[4:5], 0x8 ; encoding: [0x02,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    v_add_f16_e64 v0, s4, -4.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xee,0x01,0x00]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: add_inline_imm_neg_4.0_f16:
 ; SI:       ; %bb.0:
@@ -602,23 +874,42 @@ define amdgpu_kernel void @add_inline_imm_neg_4.0_f16(half addrspace(1)* %out, h
 }
 
 define amdgpu_kernel void @commute_add_inline_imm_0.5_f16(half addrspace(1)* %out, half addrspace(1)* %in) {
+; GFX10-LABEL: commute_add_inline_imm_0.5_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
+; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
+; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
+; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
+; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
+; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
+; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    v_add_f16_e32 v0, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x64]
+; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: commute_add_inline_imm_0.5_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_mov_b32 s0, s4
-; VI-NEXT:    s_mov_b32 s1, s5
-; VI-NEXT:    s_mov_b32 s4, s6
-; VI-NEXT:    s_mov_b32 s5, s7
-; VI-NEXT:    s_mov_b32 s6, s2
-; VI-NEXT:    s_mov_b32 s7, s3
-; VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_add_f16_e32 v0, 0.5, v0
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0 ; encoding: [0x02,0x01,0x0a,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    s_mov_b32 s0, s4 ; encoding: [0x04,0x00,0x80,0xbe]
+; VI-NEXT:    s_mov_b32 s1, s5 ; encoding: [0x05,0x00,0x81,0xbe]
+; VI-NEXT:    s_mov_b32 s4, s6 ; encoding: [0x06,0x00,0x84,0xbe]
+; VI-NEXT:    s_mov_b32 s5, s7 ; encoding: [0x07,0x00,0x85,0xbe]
+; VI-NEXT:    s_mov_b32 s6, s2 ; encoding: [0x02,0x00,0x86,0xbe]
+; VI-NEXT:    s_mov_b32 s7, s3 ; encoding: [0x03,0x00,0x87,0xbe]
+; VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x01,0x80]
+; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; VI-NEXT:    v_add_f16_e32 v0, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x3e]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: commute_add_inline_imm_0.5_f16:
 ; SI:       ; %bb.0:
@@ -646,23 +937,42 @@ define amdgpu_kernel void @commute_add_inline_imm_0.5_f16(half addrspace(1)* %ou
 }
 
 define amdgpu_kernel void @commute_add_literal_f16(half addrspace(1)* %out, half addrspace(1)* %in) {
+; GFX10-LABEL: commute_add_literal_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
+; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
+; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
+; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
+; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
+; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
+; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    v_add_f16_e32 v0, 0x6400, v0 ; encoding: [0xff,0x00,0x00,0x64,0x00,0x64,0x00,0x00]
+; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: commute_add_literal_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_mov_b32 s0, s4
-; VI-NEXT:    s_mov_b32 s1, s5
-; VI-NEXT:    s_mov_b32 s4, s6
-; VI-NEXT:    s_mov_b32 s5, s7
-; VI-NEXT:    s_mov_b32 s6, s2
-; VI-NEXT:    s_mov_b32 s7, s3
-; VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_add_f16_e32 v0, 0x6400, v0
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0 ; encoding: [0x02,0x01,0x0a,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    s_mov_b32 s0, s4 ; encoding: [0x04,0x00,0x80,0xbe]
+; VI-NEXT:    s_mov_b32 s1, s5 ; encoding: [0x05,0x00,0x81,0xbe]
+; VI-NEXT:    s_mov_b32 s4, s6 ; encoding: [0x06,0x00,0x84,0xbe]
+; VI-NEXT:    s_mov_b32 s5, s7 ; encoding: [0x07,0x00,0x85,0xbe]
+; VI-NEXT:    s_mov_b32 s6, s2 ; encoding: [0x02,0x00,0x86,0xbe]
+; VI-NEXT:    s_mov_b32 s7, s3 ; encoding: [0x03,0x00,0x87,0xbe]
+; VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x01,0x80]
+; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; VI-NEXT:    v_add_f16_e32 v0, 0x6400, v0 ; encoding: [0xff,0x00,0x00,0x3e,0x00,0x64,0x00,0x00]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: commute_add_literal_f16:
 ; SI:       ; %bb.0:
@@ -690,16 +1000,29 @@ define amdgpu_kernel void @commute_add_literal_f16(half addrspace(1)* %out, half
 }
 
 define amdgpu_kernel void @add_inline_imm_1_f16(half addrspace(1)* %out, half %x) {
+; GFX10-LABEL: add_inline_imm_1_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
+; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    v_add_f16_e64 v0, s2, 1 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x02,0x01,0x00]
+; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
+; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: add_inline_imm_1_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VI-NEXT:    s_load_dword s4, s[4:5], 0x8
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_add_f16_e64 v0, s4, 1
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_load_dword s4, s[4:5], 0x8 ; encoding: [0x02,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    v_add_f16_e64 v0, s4, 1 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x02,0x01,0x00]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: add_inline_imm_1_f16:
 ; SI:       ; %bb.0:
@@ -719,16 +1042,29 @@ define amdgpu_kernel void @add_inline_imm_1_f16(half addrspace(1)* %out, half %x
 }
 
 define amdgpu_kernel void @add_inline_imm_2_f16(half addrspace(1)* %out, half %x) {
+; GFX10-LABEL: add_inline_imm_2_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
+; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    v_add_f16_e64 v0, s2, 2 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x04,0x01,0x00]
+; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
+; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: add_inline_imm_2_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VI-NEXT:    s_load_dword s4, s[4:5], 0x8
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_add_f16_e64 v0, s4, 2
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_load_dword s4, s[4:5], 0x8 ; encoding: [0x02,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    v_add_f16_e64 v0, s4, 2 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x04,0x01,0x00]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: add_inline_imm_2_f16:
 ; SI:       ; %bb.0:
@@ -748,16 +1084,29 @@ define amdgpu_kernel void @add_inline_imm_2_f16(half addrspace(1)* %out, half %x
 }
 
 define amdgpu_kernel void @add_inline_imm_16_f16(half addrspace(1)* %out, half %x) {
+; GFX10-LABEL: add_inline_imm_16_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
+; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    v_add_f16_e64 v0, s2, 16 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x20,0x01,0x00]
+; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
+; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: add_inline_imm_16_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VI-NEXT:    s_load_dword s4, s[4:5], 0x8
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_add_f16_e64 v0, s4, 16
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_load_dword s4, s[4:5], 0x8 ; encoding: [0x02,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    v_add_f16_e64 v0, s4, 16 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x20,0x01,0x00]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: add_inline_imm_16_f16:
 ; SI:       ; %bb.0:
@@ -777,23 +1126,42 @@ define amdgpu_kernel void @add_inline_imm_16_f16(half addrspace(1)* %out, half %
 }
 
 define amdgpu_kernel void @add_inline_imm_neg_1_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) {
+; GFX10-LABEL: add_inline_imm_neg_1_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
+; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
+; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
+; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
+; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
+; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
+; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    v_add_nc_u16_e64 v0, v0, -1 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0x83,0x01,0x00]
+; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: add_inline_imm_neg_1_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_mov_b32 s0, s4
-; VI-NEXT:    s_mov_b32 s1, s5
-; VI-NEXT:    s_mov_b32 s4, s6
-; VI-NEXT:    s_mov_b32 s5, s7
-; VI-NEXT:    s_mov_b32 s6, s2
-; VI-NEXT:    s_mov_b32 s7, s3
-; VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_add_u16_e32 v0, -1, v0
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0 ; encoding: [0x02,0x01,0x0a,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    s_mov_b32 s0, s4 ; encoding: [0x04,0x00,0x80,0xbe]
+; VI-NEXT:    s_mov_b32 s1, s5 ; encoding: [0x05,0x00,0x81,0xbe]
+; VI-NEXT:    s_mov_b32 s4, s6 ; encoding: [0x06,0x00,0x84,0xbe]
+; VI-NEXT:    s_mov_b32 s5, s7 ; encoding: [0x07,0x00,0x85,0xbe]
+; VI-NEXT:    s_mov_b32 s6, s2 ; encoding: [0x02,0x00,0x86,0xbe]
+; VI-NEXT:    s_mov_b32 s7, s3 ; encoding: [0x03,0x00,0x87,0xbe]
+; VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x01,0x80]
+; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; VI-NEXT:    v_add_u16_e32 v0, -1, v0 ; encoding: [0xc1,0x00,0x00,0x4c]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: add_inline_imm_neg_1_f16:
 ; SI:       ; %bb.0:
@@ -820,23 +1188,42 @@ define amdgpu_kernel void @add_inline_imm_neg_1_f16(half addrspace(1)* %out, i16
 }
 
 define amdgpu_kernel void @add_inline_imm_neg_2_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) {
+; GFX10-LABEL: add_inline_imm_neg_2_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
+; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
+; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
+; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
+; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
+; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
+; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    v_add_nc_u16_e64 v0, v0, -2 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0x85,0x01,0x00]
+; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: add_inline_imm_neg_2_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_mov_b32 s0, s4
-; VI-NEXT:    s_mov_b32 s1, s5
-; VI-NEXT:    s_mov_b32 s4, s6
-; VI-NEXT:    s_mov_b32 s5, s7
-; VI-NEXT:    s_mov_b32 s6, s2
-; VI-NEXT:    s_mov_b32 s7, s3
-; VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_add_u16_e32 v0, -2, v0
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0 ; encoding: [0x02,0x01,0x0a,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    s_mov_b32 s0, s4 ; encoding: [0x04,0x00,0x80,0xbe]
+; VI-NEXT:    s_mov_b32 s1, s5 ; encoding: [0x05,0x00,0x81,0xbe]
+; VI-NEXT:    s_mov_b32 s4, s6 ; encoding: [0x06,0x00,0x84,0xbe]
+; VI-NEXT:    s_mov_b32 s5, s7 ; encoding: [0x07,0x00,0x85,0xbe]
+; VI-NEXT:    s_mov_b32 s6, s2 ; encoding: [0x02,0x00,0x86,0xbe]
+; VI-NEXT:    s_mov_b32 s7, s3 ; encoding: [0x03,0x00,0x87,0xbe]
+; VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x01,0x80]
+; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; VI-NEXT:    v_add_u16_e32 v0, -2, v0 ; encoding: [0xc2,0x00,0x00,0x4c]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: add_inline_imm_neg_2_f16:
 ; SI:       ; %bb.0:
@@ -863,23 +1250,42 @@ define amdgpu_kernel void @add_inline_imm_neg_2_f16(half addrspace(1)* %out, i16
 }
 
 define amdgpu_kernel void @add_inline_imm_neg_16_f16(half addrspace(1)* %out, i16 addrspace(1)* %in) {
+; GFX10-LABEL: add_inline_imm_neg_16_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe]
+; GFX10-NEXT:    s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe]
+; GFX10-NEXT:    s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe]
+; GFX10-NEXT:    s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe]
+; GFX10-NEXT:    s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe]
+; GFX10-NEXT:    buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80]
+; GFX10-NEXT:    s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe]
+; GFX10-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf]
+; GFX10-NEXT:    v_add_nc_u16_e64 v0, v0, -16 ; encoding: [0x00,0x00,0x03,0xd7,0x00,0xa1,0x01,0x00]
+; GFX10-NEXT:    buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: add_inline_imm_neg_16_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_mov_b32 s0, s4
-; VI-NEXT:    s_mov_b32 s1, s5
-; VI-NEXT:    s_mov_b32 s4, s6
-; VI-NEXT:    s_mov_b32 s5, s7
-; VI-NEXT:    s_mov_b32 s6, s2
-; VI-NEXT:    s_mov_b32 s7, s3
-; VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
-; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_add_u16_e32 v0, -16, v0
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0 ; encoding: [0x02,0x01,0x0a,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    s_mov_b32 s0, s4 ; encoding: [0x04,0x00,0x80,0xbe]
+; VI-NEXT:    s_mov_b32 s1, s5 ; encoding: [0x05,0x00,0x81,0xbe]
+; VI-NEXT:    s_mov_b32 s4, s6 ; encoding: [0x06,0x00,0x84,0xbe]
+; VI-NEXT:    s_mov_b32 s5, s7 ; encoding: [0x07,0x00,0x85,0xbe]
+; VI-NEXT:    s_mov_b32 s6, s2 ; encoding: [0x02,0x00,0x86,0xbe]
+; VI-NEXT:    s_mov_b32 s7, s3 ; encoding: [0x03,0x00,0x87,0xbe]
+; VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x01,0x80]
+; VI-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
+; VI-NEXT:    v_add_u16_e32 v0, -16, v0 ; encoding: [0xd0,0x00,0x00,0x4c]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: add_inline_imm_neg_16_f16:
 ; SI:       ; %bb.0:
@@ -906,16 +1312,29 @@ define amdgpu_kernel void @add_inline_imm_neg_16_f16(half addrspace(1)* %out, i1
 }
 
 define amdgpu_kernel void @add_inline_imm_63_f16(half addrspace(1)* %out, half %x) {
+; GFX10-LABEL: add_inline_imm_63_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
+; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    v_add_f16_e64 v0, s2, 63 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x7e,0x01,0x00]
+; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
+; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: add_inline_imm_63_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VI-NEXT:    s_load_dword s4, s[4:5], 0x8
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_add_f16_e64 v0, s4, 63
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_load_dword s4, s[4:5], 0x8 ; encoding: [0x02,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    v_add_f16_e64 v0, s4, 63 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x7e,0x01,0x00]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: add_inline_imm_63_f16:
 ; SI:       ; %bb.0:
@@ -935,16 +1354,29 @@ define amdgpu_kernel void @add_inline_imm_63_f16(half addrspace(1)* %out, half %
 }
 
 define amdgpu_kernel void @add_inline_imm_64_f16(half addrspace(1)* %out, half %x) {
+; GFX10-LABEL: add_inline_imm_64_f16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf]
+; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa]
+; GFX10-NEXT:    s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf]
+; GFX10-NEXT:    v_add_f16_e64 v0, s2, 64 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x80,0x01,0x00]
+; GFX10-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe]
+; GFX10-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; GFX10-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
+;
 ; VI-LABEL: add_inline_imm_64_f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VI-NEXT:    s_load_dword s4, s[4:5], 0x8
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_add_f16_e64 v0, s4, 64
-; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x06,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT:    s_load_dword s4, s[4:5], 0x8 ; encoding: [0x02,0x01,0x02,0xc0,0x08,0x00,0x00,0x00]
+; VI-NEXT:    s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11]
+; VI-NEXT:    s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe]
+; VI-NEXT:    s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf]
+; VI-NEXT:    v_add_f16_e64 v0, s4, 64 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x80,0x01,0x00]
+; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80]
+; VI-NEXT:    s_endpgm ; encoding: [0x00,0x00,0x81,0xbf]
 ;
 ; SI-LABEL: add_inline_imm_64_f16:
 ; SI:       ; %bb.0:
@@ -962,3 +1394,329 @@ define amdgpu_kernel void @add_inline_imm_64_f16(half addrspace(1)* %out, half %
   store half %y, half addrspace(1)* %out
   ret void
 }
+
+; This needs to be emitted as a literal constant since the 16-bit
+; float values do not work for 16-bit integer operations.
+define void @mul_inline_imm_0.5_i16(i16 addrspace(1)* %out, i16 %x) {
+; GFX10-LABEL: mul_inline_imm_0.5_i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    v_mul_lo_u16_e64 v2, v2, 0.5 ; encoding: [0x02,0x00,0x05,0xd7,0x02,0xe1,0x01,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
+;
+; VI-LABEL: mul_inline_imm_0.5_i16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; VI-NEXT:    v_mul_lo_u16_e32 v2, 0.5, v2 ; encoding: [0xf0,0x04,0x04,0x52]
+; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf]
+; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
+;
+; SI-LABEL: mul_inline_imm_0.5_i16:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s4, s6
+; SI-NEXT:    s_mov_b32 s5, s6
+; SI-NEXT:    v_mul_u32_u24_e32 v2, 0x3800, v2
+; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; SI-NEXT:    s_setpc_b64 s[30:31]
+  %y = mul i16 %x, bitcast (half 0.5 to i16)
+  store i16 %y, i16 addrspace(1)* %out
+  ret void
+}
+
+define void @mul_inline_imm_neg_0.5_i16(i16 addrspace(1)* %out, i16 %x) {
+; GFX10-LABEL: mul_inline_imm_neg_0.5_i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    v_mul_lo_u16_e64 v2, v2, 0xffffb800 ; encoding: [0x02,0x00,0x05,0xd7,0x02,0xe3,0x01,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
+;
+; VI-LABEL: mul_inline_imm_neg_0.5_i16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; VI-NEXT:    v_mul_lo_u16_e32 v2, 0xffffb800, v2 ; encoding: [0xf1,0x04,0x04,0x52]
+; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf]
+; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
+;
+; SI-LABEL: mul_inline_imm_neg_0.5_i16:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s4, s6
+; SI-NEXT:    s_mov_b32 s5, s6
+; SI-NEXT:    v_mul_u32_u24_e32 v2, 0xb800, v2
+; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; SI-NEXT:    s_setpc_b64 s[30:31]
+  %y = mul i16 %x, bitcast (half -0.5 to i16)
+  store i16 %y, i16 addrspace(1)* %out
+  ret void
+}
+
+define void @mul_inline_imm_1.0_i16(i16 addrspace(1)* %out, i16 %x) {
+; GFX10-LABEL: mul_inline_imm_1.0_i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    v_mul_lo_u16_e64 v2, v2, 1.0 ; encoding: [0x02,0x00,0x05,0xd7,0x02,0xe5,0x01,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
+;
+; VI-LABEL: mul_inline_imm_1.0_i16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; VI-NEXT:    v_mul_lo_u16_e32 v2, 1.0, v2 ; encoding: [0xf2,0x04,0x04,0x52]
+; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf]
+; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
+;
+; SI-LABEL: mul_inline_imm_1.0_i16:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s4, s6
+; SI-NEXT:    s_mov_b32 s5, s6
+; SI-NEXT:    v_mul_u32_u24_e32 v2, 0x3c00, v2
+; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; SI-NEXT:    s_setpc_b64 s[30:31]
+  %y = mul i16 %x, bitcast (half 1.0 to i16)
+  store i16 %y, i16 addrspace(1)* %out
+  ret void
+}
+
+define void @mul_inline_imm_neg_1.0_i16(i16 addrspace(1)* %out, i16 %x) {
+; GFX10-LABEL: mul_inline_imm_neg_1.0_i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    v_mul_lo_u16_e64 v2, v2, 0xffffbc00 ; encoding: [0x02,0x00,0x05,0xd7,0x02,0xe7,0x01,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
+;
+; VI-LABEL: mul_inline_imm_neg_1.0_i16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; VI-NEXT:    v_mul_lo_u16_e32 v2, 0xffffbc00, v2 ; encoding: [0xf3,0x04,0x04,0x52]
+; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf]
+; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
+;
+; SI-LABEL: mul_inline_imm_neg_1.0_i16:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s4, s6
+; SI-NEXT:    s_mov_b32 s5, s6
+; SI-NEXT:    v_mul_u32_u24_e32 v2, 0xbc00, v2
+; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; SI-NEXT:    s_setpc_b64 s[30:31]
+  %y = mul i16 %x, bitcast (half -1.0 to i16)
+  store i16 %y, i16 addrspace(1)* %out
+  ret void
+}
+
+define void @shl_inline_imm_2.0_i16(i16 addrspace(1)* %out, i16 %x) {
+; GFX10-LABEL: shl_inline_imm_2.0_i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    v_lshlrev_b16_e64 v2, v2, 2.0 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xe9,0x01,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
+;
+; VI-LABEL: shl_inline_imm_2.0_i16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; VI-NEXT:    v_lshlrev_b16_e64 v2, v2, 2.0 ; encoding: [0x02,0x00,0x2a,0xd1,0x02,0xe9,0x01,0x00]
+; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf]
+; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
+;
+; SI-LABEL: shl_inline_imm_2.0_i16:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s4, s6
+; SI-NEXT:    s_mov_b32 s5, s6
+; SI-NEXT:    v_lshl_b32_e32 v2, 0x4000, v2
+; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; SI-NEXT:    s_setpc_b64 s[30:31]
+  %y = shl i16 bitcast (half 2.0 to i16), %x
+  store i16 %y, i16 addrspace(1)* %out
+  ret void
+}
+
+define void @shl_inline_imm_neg_2.0_i16(i16 addrspace(1)* %out, i16 %x) {
+; GFX10-LABEL: shl_inline_imm_neg_2.0_i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    v_lshlrev_b16_e64 v2, v2, 0xffffc000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xeb,0x01,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
+;
+; VI-LABEL: shl_inline_imm_neg_2.0_i16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; VI-NEXT:    v_lshlrev_b16_e64 v2, v2, 0xffffc000 ; encoding: [0x02,0x00,0x2a,0xd1,0x02,0xeb,0x01,0x00]
+; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf]
+; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
+;
+; SI-LABEL: shl_inline_imm_neg_2.0_i16:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s4, s6
+; SI-NEXT:    s_mov_b32 s5, s6
+; SI-NEXT:    v_lshl_b32_e32 v2, 0xffffc000, v2
+; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; SI-NEXT:    s_setpc_b64 s[30:31]
+  %y = shl i16 bitcast (half -2.0 to i16), %x
+  store i16 %y, i16 addrspace(1)* %out
+  ret void
+}
+
+define void @mul_inline_imm_4.0_i16(i16 addrspace(1)* %out, i16 %x) {
+; GFX10-LABEL: mul_inline_imm_4.0_i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    v_mul_lo_u16_e64 v2, v2, 4.0 ; encoding: [0x02,0x00,0x05,0xd7,0x02,0xed,0x01,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
+;
+; VI-LABEL: mul_inline_imm_4.0_i16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; VI-NEXT:    v_mul_lo_u16_e32 v2, 4.0, v2 ; encoding: [0xf6,0x04,0x04,0x52]
+; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf]
+; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
+;
+; SI-LABEL: mul_inline_imm_4.0_i16:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s4, s6
+; SI-NEXT:    s_mov_b32 s5, s6
+; SI-NEXT:    v_mul_u32_u24_e32 v2, 0x4400, v2
+; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; SI-NEXT:    s_setpc_b64 s[30:31]
+  %y = mul i16 %x, bitcast (half 4.0 to i16)
+  store i16 %y, i16 addrspace(1)* %out
+  ret void
+}
+
+define void @mul_inline_imm_neg_4.0_i16(i16 addrspace(1)* %out, i16 %x) {
+; GFX10-LABEL: mul_inline_imm_neg_4.0_i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    v_mul_lo_u16_e64 v2, v2, 0xffffc400 ; encoding: [0x02,0x00,0x05,0xd7,0x02,0xef,0x01,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
+;
+; VI-LABEL: mul_inline_imm_neg_4.0_i16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; VI-NEXT:    v_mul_lo_u16_e32 v2, 0xffffc400, v2 ; encoding: [0xf7,0x04,0x04,0x52]
+; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf]
+; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
+;
+; SI-LABEL: mul_inline_imm_neg_4.0_i16:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s4, s6
+; SI-NEXT:    s_mov_b32 s5, s6
+; SI-NEXT:    v_mul_u32_u24_e32 v2, 0xc400, v2
+; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; SI-NEXT:    s_setpc_b64 s[30:31]
+  %y = mul i16 %x, bitcast (half -4.0 to i16)
+  store i16 %y, i16 addrspace(1)* %out
+  ret void
+}
+
+define void @mul_inline_imm_inv2pi_i16(i16 addrspace(1)* %out, i16 %x) {
+; GFX10-LABEL: mul_inline_imm_inv2pi_i16:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    v_mul_lo_u16_e64 v2, v2, 0.15915494 ; encoding: [0x02,0x00,0x05,0xd7,0x02,0xf1,0x01,0x00]
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00]
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb]
+; GFX10-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
+;
+; VI-LABEL: mul_inline_imm_inv2pi_i16:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+; VI-NEXT:    v_mul_lo_u16_e32 v2, 0.15915494, v2 ; encoding: [0xf8,0x04,0x04,0x52]
+; VI-NEXT:    flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00]
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0) ; encoding: [0x70,0x00,0x8c,0xbf]
+; VI-NEXT:    s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
+;
+; SI-LABEL: mul_inline_imm_inv2pi_i16:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s4, s6
+; SI-NEXT:    s_mov_b32 s5, s6
+; SI-NEXT:    v_mul_u32_u24_e32 v2, 0x3118, v2
+; SI-NEXT:    buffer_store_short v2, v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
+; SI-NEXT:    s_setpc_b64 s[30:31]
+  %y = mul i16 %x, bitcast (half 0xH3118 to i16)
+  store i16 %y, i16 addrspace(1)* %out
+  ret void
+}

diff  --git a/llvm/test/CodeGen/AMDGPU/immv216.ll b/llvm/test/CodeGen/AMDGPU/immv216.ll
index 77cc717e057b..9b2f211faaee 100644
--- a/llvm/test/CodeGen/AMDGPU/immv216.ll
+++ b/llvm/test/CodeGen/AMDGPU/immv216.ll
@@ -1,10 +1,11 @@
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
-; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
+; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
 ; FIXME: Merge into imm.ll
 
 ; GCN-LABEL: {{^}}store_inline_imm_neg_0.0_v2i16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000{{$}}
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000 ; encoding
 ; GCN: buffer_store_dword [[REG]]
 define amdgpu_kernel void @store_inline_imm_neg_0.0_v2i16(<2 x i16> addrspace(1)* %out) #0 {
   store <2 x i16> <i16 -32768, i16 -32768>, <2 x i16> addrspace(1)* %out
@@ -12,7 +13,7 @@ define amdgpu_kernel void @store_inline_imm_neg_0.0_v2i16(<2 x i16> addrspace(1)
 }
 
 ; GCN-LABEL: {{^}}store_inline_imm_0.0_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0 ; encoding
 ; GCN: buffer_store_dword [[REG]]
 define amdgpu_kernel void @store_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
   store <2 x half> <half 0.0, half 0.0>, <2 x half> addrspace(1)* %out
@@ -20,7 +21,7 @@ define amdgpu_kernel void @store_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %
 }
 
 ; GCN-LABEL: {{^}}store_imm_neg_0.0_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000{{$}}
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000 ; encoding
 ; GCN: buffer_store_dword [[REG]]
 define amdgpu_kernel void @store_imm_neg_0.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
   store <2 x half> <half -0.0, half -0.0>, <2 x half> addrspace(1)* %out
@@ -28,7 +29,7 @@ define amdgpu_kernel void @store_imm_neg_0.0_v2f16(<2 x half> addrspace(1)* %out
 }
 
 ; GCN-LABEL: {{^}}store_inline_imm_0.5_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x38003800{{$}}
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x38003800 ; encoding
 ; GCN: buffer_store_dword [[REG]]
 define amdgpu_kernel void @store_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %out) #0 {
   store <2 x half> <half 0.5, half 0.5>, <2 x half> addrspace(1)* %out
@@ -36,7 +37,7 @@ define amdgpu_kernel void @store_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %
 }
 
 ; GCN-LABEL: {{^}}store_inline_imm_m_0.5_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb800b800{{$}}
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb800b800 ; encoding
 ; GCN: buffer_store_dword [[REG]]
 define amdgpu_kernel void @store_inline_imm_m_0.5_v2f16(<2 x half> addrspace(1)* %out) #0 {
   store <2 x half> <half -0.5, half -0.5>, <2 x half> addrspace(1)* %out
@@ -44,7 +45,7 @@ define amdgpu_kernel void @store_inline_imm_m_0.5_v2f16(<2 x half> addrspace(1)*
 }
 
 ; GCN-LABEL: {{^}}store_inline_imm_1.0_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c003c00{{$}}
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c003c00 ; encoding
 ; GCN: buffer_store_dword [[REG]]
 define amdgpu_kernel void @store_inline_imm_1.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
   store <2 x half> <half 1.0, half 1.0>, <2 x half> addrspace(1)* %out
@@ -52,7 +53,7 @@ define amdgpu_kernel void @store_inline_imm_1.0_v2f16(<2 x half> addrspace(1)* %
 }
 
 ; GCN-LABEL: {{^}}store_inline_imm_m_1.0_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbc00bc00{{$}}
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbc00bc00 ; encoding
 ; GCN: buffer_store_dword [[REG]]
 define amdgpu_kernel void @store_inline_imm_m_1.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
   store <2 x half> <half -1.0, half -1.0>, <2 x half> addrspace(1)* %out
@@ -60,7 +61,7 @@ define amdgpu_kernel void @store_inline_imm_m_1.0_v2f16(<2 x half> addrspace(1)*
 }
 
 ; GCN-LABEL: {{^}}store_inline_imm_2.0_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x40004000{{$}}
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x40004000 ; encoding
 ; GCN: buffer_store_dword [[REG]]
 define amdgpu_kernel void @store_inline_imm_2.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
   store <2 x half> <half 2.0, half 2.0>, <2 x half> addrspace(1)* %out
@@ -68,7 +69,7 @@ define amdgpu_kernel void @store_inline_imm_2.0_v2f16(<2 x half> addrspace(1)* %
 }
 
 ; GCN-LABEL: {{^}}store_inline_imm_m_2.0_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc000c000{{$}}
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc000c000 ; encoding
 ; GCN: buffer_store_dword [[REG]]
 define amdgpu_kernel void @store_inline_imm_m_2.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
   store <2 x half> <half -2.0, half -2.0>, <2 x half> addrspace(1)* %out
@@ -76,7 +77,7 @@ define amdgpu_kernel void @store_inline_imm_m_2.0_v2f16(<2 x half> addrspace(1)*
 }
 
 ; GCN-LABEL: {{^}}store_inline_imm_4.0_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x44004400{{$}}
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x44004400 ; encoding
 ; GCN: buffer_store_dword [[REG]]
 define amdgpu_kernel void @store_inline_imm_4.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
   store <2 x half> <half 4.0, half 4.0>, <2 x half> addrspace(1)* %out
@@ -84,7 +85,7 @@ define amdgpu_kernel void @store_inline_imm_4.0_v2f16(<2 x half> addrspace(1)* %
 }
 
 ; GCN-LABEL: {{^}}store_inline_imm_m_4.0_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc400c400{{$}}
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc400c400 ; encoding
 ; GCN: buffer_store_dword [[REG]]
 define amdgpu_kernel void @store_inline_imm_m_4.0_v2f16(<2 x half> addrspace(1)* %out) #0 {
   store <2 x half> <half -4.0, half -4.0>, <2 x half> addrspace(1)* %out
@@ -92,7 +93,7 @@ define amdgpu_kernel void @store_inline_imm_m_4.0_v2f16(<2 x half> addrspace(1)*
 }
 
 ; GCN-LABEL: {{^}}store_inline_imm_inv_2pi_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x31183118{{$}}
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x31183118 ; encoding
 ; GCN: buffer_store_dword [[REG]]
 define amdgpu_kernel void @store_inline_imm_inv_2pi_v2f16(<2 x half> addrspace(1)* %out) #0 {
   store <2 x half> <half 0xH3118, half 0xH3118>, <2 x half> addrspace(1)* %out
@@ -100,7 +101,7 @@ define amdgpu_kernel void @store_inline_imm_inv_2pi_v2f16(<2 x half> addrspace(1
 }
 
 ; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_v2f16:
-; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb118b118{{$}}
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb118b118 ; encoding
 ; GCN: buffer_store_dword [[REG]]
 define amdgpu_kernel void @store_inline_imm_m_inv_2pi_v2f16(<2 x half> addrspace(1)* %out) #0 {
   store <2 x half> <half 0xHB118, half 0xHB118>, <2 x half> addrspace(1)* %out
@@ -117,7 +118,7 @@ define amdgpu_kernel void @store_literal_imm_v2f16(<2 x half> addrspace(1)* %out
 
 ; GCN-LABEL: {{^}}add_inline_imm_0.0_v2f16:
 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
-; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0{{$}}
+; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0 ; encoding
 ; GFX9: buffer_store_dword [[REG]]
 
 ; FIXME: Shouldn't need right shift and SDWA, also extra copy
@@ -137,8 +138,12 @@ define amdgpu_kernel void @add_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %ou
 }
 
 ; GCN-LABEL: {{^}}add_inline_imm_0.5_v2f16:
+; GFX10: s_load_dword [[VAL:s[0-9]+]]
+; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5 op_sel_hi:[1,0] ; encoding: [0x00,0x00,0x0f,0xcc,0x02,0xe0,0x01,0x08]
+; GFX10: buffer_store_dword [[REG]]
+
 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
-; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5 op_sel_hi:[1,0]{{$}}
+; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5 op_sel_hi:[1,0] ; encoding: [0x00,0x00,0x8f,0xd3,0x04,0xe0,0x01,0x08]
 ; GFX9: buffer_store_dword [[REG]]
 
 ; FIXME: Shouldn't need right shift and SDWA, also extra copy
@@ -158,8 +163,12 @@ define amdgpu_kernel void @add_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %ou
 }
 
 ; GCN-LABEL: {{^}}add_inline_imm_neg_0.5_v2f16:
+; GFX10: s_load_dword [[VAL:s[0-9]+]]
+; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -0.5 op_sel_hi:[1,0] ; encoding: [0x00,0x00,0x0f,0xcc,0x02,0xe2,0x01,0x08]
+; GFX10: buffer_store_dword [[REG]]
+
 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
-; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -0.5 op_sel_hi:[1,0]{{$}}
+; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -0.5 op_sel_hi:[1,0] ; encoding: [0x00,0x00,0x8f,0xd3,0x04,0xe2,0x01,0x08]
 ; GFX9: buffer_store_dword [[REG]]
 
 ; FIXME: Shouldn't need right shift and SDWA, also extra copy
@@ -180,7 +189,7 @@ define amdgpu_kernel void @add_inline_imm_neg_0.5_v2f16(<2 x half> addrspace(1)*
 
 ; GCN-LABEL: {{^}}add_inline_imm_1.0_v2f16:
 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
-; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1.0 op_sel_hi:[1,0]{{$}}
+; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1.0 op_sel_hi:[1,0] ; encoding
 ; GFX9: buffer_store_dword [[REG]]
 
 ; FIXME: Shouldn't need right shift and SDWA, also extra copy
@@ -201,7 +210,7 @@ define amdgpu_kernel void @add_inline_imm_1.0_v2f16(<2 x half> addrspace(1)* %ou
 
 ; GCN-LABEL: {{^}}add_inline_imm_neg_1.0_v2f16:
 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
-; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -1.0 op_sel_hi:[1,0]{{$}}
+; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -1.0 op_sel_hi:[1,0] ; encoding
 ; GFX9: buffer_store_dword [[REG]]
 
 
@@ -223,7 +232,7 @@ define amdgpu_kernel void @add_inline_imm_neg_1.0_v2f16(<2 x half> addrspace(1)*
 
 ; GCN-LABEL: {{^}}add_inline_imm_2.0_v2f16:
 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
-; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2.0 op_sel_hi:[1,0]{{$}}
+; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2.0 op_sel_hi:[1,0] ; encoding
 ; GFX9: buffer_store_dword [[REG]]
 
 ; FIXME: Shouldn't need right shift and SDWA, also extra copy
@@ -244,7 +253,7 @@ define amdgpu_kernel void @add_inline_imm_2.0_v2f16(<2 x half> addrspace(1)* %ou
 
 ; GCN-LABEL: {{^}}add_inline_imm_neg_2.0_v2f16:
 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
-; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -2.0 op_sel_hi:[1,0]{{$}}
+; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -2.0 op_sel_hi:[1,0] ; encoding
 ; GFX9: buffer_store_dword [[REG]]
 
 ; FIXME: Shouldn't need right shift and SDWA, also extra copy
@@ -265,7 +274,7 @@ define amdgpu_kernel void @add_inline_imm_neg_2.0_v2f16(<2 x half> addrspace(1)*
 
 ; GCN-LABEL: {{^}}add_inline_imm_4.0_v2f16:
 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
-; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 4.0 op_sel_hi:[1,0]{{$}}
+; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 4.0 op_sel_hi:[1,0] ; encoding
 ; GFX9: buffer_store_dword [[REG]]
 
 ; FIXME: Shouldn't need right shift and SDWA, also extra copy
@@ -286,7 +295,7 @@ define amdgpu_kernel void @add_inline_imm_4.0_v2f16(<2 x half> addrspace(1)* %ou
 
 ; GCN-LABEL: {{^}}add_inline_imm_neg_4.0_v2f16:
 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
-; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -4.0 op_sel_hi:[1,0]{{$}}
+; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -4.0 op_sel_hi:[1,0] ; encoding
 ; GFX9: buffer_store_dword [[REG]]
 
 ; FIXME: Shouldn't need right shift and SDWA, also extra copy
@@ -325,12 +334,14 @@ define amdgpu_kernel void @commute_add_inline_imm_0.5_v2f16(<2 x half> addrspace
 }
 
 ; GCN-LABEL: {{^}}commute_add_literal_v2f16:
+; GFX10: v_pk_add_f16 v0, 0x6400, v0 op_sel_hi:[0,1] ; encoding: [0x00,0x00,0x0f,0xcc,0xff,0x00,0x02,0x10,0x00,0x64,0x00,0x00]
+
 ; GFX9-DAG: buffer_load_dword [[VAL:v[0-9]+]]
-; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x6400{{$}}
-; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], [[K]] op_sel_hi:[1,0]{{$}}
+; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x6400 ; encoding
+; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], [[K]] op_sel_hi:[1,0] ; encoding: [0x00,0x00,0x8f,0xd3,0x00,0x09,0x00,0x08]
 ; GFX9: buffer_store_dword [[REG]]
 
-; VI-DAG: s_movk_i32 [[K:s[0-9]+]], 0x6400{{$}}
+; VI-DAG: s_movk_i32 [[K:s[0-9]+]], 0x6400 ; encoding
 ; VI-DAG: buffer_load_dword
 ; VI-NOT: and
 ; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, [[K]], v{{[0-9]+}}
@@ -348,17 +359,17 @@ define amdgpu_kernel void @commute_add_literal_v2f16(<2 x half> addrspace(1)* %o
 
 ; GCN-LABEL: {{^}}add_inline_imm_1_v2f16:
 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
-; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1 op_sel_hi:[1,0]{{$}}
+; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1 op_sel_hi:[1,0] ; encoding
 ; GFX9: buffer_store_dword [[REG]]
 
 ; FIXME: Shouldn't need right shift and SDWA, also extra copy
 ; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
-; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 1{{$}}
+; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 1 ; encoding
 ; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
 ; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
 
 ; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 1{{$}}
+; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 1 ; encoding
 ; VI: v_or_b32
 ; VI: buffer_store_dword
 define amdgpu_kernel void @add_inline_imm_1_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -369,18 +380,18 @@ define amdgpu_kernel void @add_inline_imm_1_v2f16(<2 x half> addrspace(1)* %out,
 
 ; GCN-LABEL: {{^}}add_inline_imm_2_v2f16:
 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
-; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2 op_sel_hi:[1,0]{{$}}
+; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2 op_sel_hi:[1,0] ; encoding
 ; GFX9: buffer_store_dword [[REG]]
 
 
 ; FIXME: Shouldn't need right shift and SDWA, also extra copy
 ; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
-; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 2{{$}}
+; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 2 ; encoding
 ; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
 ; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
 
 ; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 2{{$}}
+; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 2 ; encoding
 ; VI: v_or_b32
 ; VI: buffer_store_dword
 define amdgpu_kernel void @add_inline_imm_2_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -391,18 +402,18 @@ define amdgpu_kernel void @add_inline_imm_2_v2f16(<2 x half> addrspace(1)* %out,
 
 ; GCN-LABEL: {{^}}add_inline_imm_16_v2f16:
 ; GFX9: s_load_dword [[VAL:s[0-9]+]]
-; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 16 op_sel_hi:[1,0]{{$}}
+; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 16 op_sel_hi:[1,0] ; encoding
 ; GFX9: buffer_store_dword [[REG]]
 
 
 ; FIXME: Shouldn't need right shift and SDWA, also extra copy
 ; VI-DAG: s_load_dword [[VAL:s[0-9]+]]
-; VI-DAG: v_mov_b32_e32 [[CONST16:v[0-9]+]], 16{{$}}
+; VI-DAG: v_mov_b32_e32 [[CONST16:v[0-9]+]], 16 ; encoding
 ; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16
 ; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]]
 
 ; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 16{{$}}
+; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 16 ; encoding
 ; VI: v_or_b32
 ; VI: buffer_store_dword
 define amdgpu_kernel void @add_inline_imm_16_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -417,7 +428,7 @@ define amdgpu_kernel void @add_inline_imm_16_v2f16(<2 x half> addrspace(1)* %out
 ; GFX9: buffer_store_dword [[REG]]
 
 ; VI: s_load_dword [[VAL:s[0-9]+]]
-; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], -1{{$}}
+; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], -1 ; encoding
 ; VI: v_mov_b32_e32 [[REG:v[0-9]+]], [[ADD]]
 ; VI: buffer_store_dword [[REG]]
 define amdgpu_kernel void @add_inline_imm_neg_1_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -434,7 +445,7 @@ define amdgpu_kernel void @add_inline_imm_neg_1_v2f16(<2 x half> addrspace(1)* %
 ; GFX9: buffer_store_dword [[REG]]
 
 ; VI: s_load_dword [[VAL:s[0-9]+]]
-; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], 0xfffefffe{{$}}
+; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], 0xfffefffe ; encoding
 ; VI: v_mov_b32_e32 [[REG:v[0-9]+]], [[ADD]]
 ; VI: buffer_store_dword [[REG]]
 define amdgpu_kernel void @add_inline_imm_neg_2_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -452,7 +463,7 @@ define amdgpu_kernel void @add_inline_imm_neg_2_v2f16(<2 x half> addrspace(1)* %
 
 
 ; VI: s_load_dword [[VAL:s[0-9]+]]
-; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], 0xfff0fff0{{$}}
+; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], 0xfff0fff0 ; encoding
 ; VI: v_mov_b32_e32 [[REG:v[0-9]+]], [[ADD]]
 ; VI: buffer_store_dword [[REG]]
 define amdgpu_kernel void @add_inline_imm_neg_16_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %x) #0 {
@@ -505,4 +516,86 @@ define amdgpu_kernel void @add_inline_imm_64_v2f16(<2 x half> addrspace(1)* %out
   ret void
 }
 
+; GCN-LABEL: {{^}}mul_inline_imm_0.5_v2i16:
+; GFX9: v_pk_mul_lo_u16 v0, v0, 0.5 op_sel_hi:[1,0]
+
+; GFX10: v_pk_mul_lo_u16 v0, v0, 0.5 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
+define <2 x i16> @mul_inline_imm_0.5_v2i16(<2 x i16> %x) {
+  %y = mul <2 x i16> %x, bitcast (<2 x half> <half 0.5, half 0.5> to <2 x i16>)
+  ret <2 x i16> %y
+}
+
+; GCN-LABEL: {{^}}mul_inline_imm_neg_0.5_v2i16:
+; GFX9: v_pk_mul_lo_u16 v0, v0, -0.5 op_sel_hi:[1,0]
+
+; GFX10: v_pk_mul_lo_u16 v0, v0, -0.5 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
+define <2 x i16> @mul_inline_imm_neg_0.5_v2i16(<2 x i16> %x) {
+  %y = mul <2 x i16> %x, bitcast (<2 x half> <half -0.5, half -0.5> to <2 x i16>)
+  ret <2 x i16> %y
+}
+
+; GCN-LABEL: {{^}}mul_inline_imm_1.0_v2i16:
+; GFX9: v_pk_mul_lo_u16 v0, v0, 1.0 op_sel_hi:[1,0]
+
+; GFX10: v_pk_mul_lo_u16 v0, v0, 1.0 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
+define <2 x i16> @mul_inline_imm_1.0_v2i16(<2 x i16> %x) {
+  %y = mul <2 x i16> %x, bitcast (<2 x half> <half 1.0, half 1.0> to <2 x i16>)
+  ret <2 x i16> %y
+}
+
+; GCN-LABEL: {{^}}mul_inline_imm_neg_1.0_v2i16:
+; GFX9: v_pk_mul_lo_u16 v0, v0, -1.0 op_sel_hi:[1,0]
+
+; GFX10: v_pk_mul_lo_u16 v0, v0, -1.0 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
+define <2 x i16> @mul_inline_imm_neg_1.0_v2i16(<2 x i16> %x) {
+  %y = mul <2 x i16> %x, bitcast (<2 x half> <half -1.0, half -1.0> to <2 x i16>)
+  ret <2 x i16> %y
+}
+
+; GCN-LABEL: {{^}}shl_inline_imm_2.0_v2i16:
+; GFX9: v_pk_lshlrev_b16 v0, v0, 2.0 op_sel_hi:[1,0]
+
+; GFX10: v_pk_lshlrev_b16 v0, v0, 2.0 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
+define <2 x i16> @shl_inline_imm_2.0_v2i16(<2 x i16> %x) {
+  %y = shl <2 x i16> bitcast (<2 x half> <half 2.0, half 2.0> to <2 x i16>), %x
+  ret <2 x i16> %y
+}
+
+; GCN-LABEL: {{^}}shl_inline_imm_neg_2.0_v2i16:
+; GFX9: v_pk_lshlrev_b16 v0, v0, -2.0 op_sel_hi:[1,0]
+
+; GFX10: v_pk_lshlrev_b16 v0, v0, -2.0 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
+define <2 x i16> @shl_inline_imm_neg_2.0_v2i16(<2 x i16> %x) {
+  %y = shl <2 x i16> bitcast (<2 x half> <half -2.0, half -2.0> to <2 x i16>), %x
+  ret <2 x i16> %y
+}
+
+; GCN-LABEL: {{^}}mul_inline_imm_4.0_v2i16:
+; GFX9: v_pk_mul_lo_u16 v0, v0, 4.0 op_sel_hi:[1,0]
+
+; GFX10: v_pk_mul_lo_u16 v0, v0, 4.0 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
+define <2 x i16> @mul_inline_imm_4.0_v2i16(<2 x i16> %x) {
+  %y = mul <2 x i16> %x, bitcast (<2 x half> <half 4.0, half 4.0> to <2 x i16>)
+  ret <2 x i16> %y
+
+}
+
+; GCN-LABEL: {{^}}mul_inline_imm_neg_4.0_v2i16:
+; GFX9: v_pk_mul_lo_u16 v0, v0, -4.0 op_sel_hi:[1,0]
+
+; GFX10: v_pk_mul_lo_u16 v0, v0, -4.0 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
+define <2 x i16> @mul_inline_imm_neg_4.0_v2i16(<2 x i16> %x) {
+  %y = mul <2 x i16> %x, bitcast (<2 x half> <half -4.0, half -4.0> to <2 x i16>)
+  ret <2 x i16> %y
+}
+
+; GCN-LABEL: {{^}}mul_inline_imm_inv2pi_v2i16:
+; GFX9: v_pk_mul_lo_u16 v0, v0, 0.15915494 op_sel_hi:[1,0]
+
+; GFX10: v_pk_mul_lo_u16 v0, v0, 0.15915494 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
+define <2 x i16> @mul_inline_imm_inv2pi_v2i16(<2 x i16> %x) {
+  %y = mul <2 x i16> %x, bitcast (<2 x half> <half 0xH3118, half 0xH3118> to <2 x i16>)
+  ret <2 x i16> %y
+}
+
 attributes #0 = { nounwind }


        


More information about the llvm-commits mailing list