[llvm] [AMDGPU] Add GFX12 test coverage for export instructions (PR #100415)

Wed Jul 24 09:28:24 PDT 2024

https://github.com/jayfoad created https://github.com/llvm/llvm-project/pull/100415

None

>From 5f07afeed5f37c9c5a70c5316bb26a78d4be0562 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Wed, 24 Jul 2024 17:27:59 +0100
Subject: [PATCH] [AMDGPU] Add GFX12 test coverage for export instructions

---
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll   | 177 +++++++++---------
 .../CodeGen/AMDGPU/llvm.amdgcn.exp.prim.ll    |   3 +-
 .../CodeGen/AMDGPU/llvm.amdgcn.exp.row.ll     |  67 +++++++
 3 files changed, 158 insertions(+), 89 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
index 67e4feed21fac..c120c58ce6f33 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
@@ -1,14 +1,15 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX8,PREGFX11 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX10,PREGFX11 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX11 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX11 %s
 
 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
 declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1
 declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #2
 
 ; GCN-LABEL: {{^}}test_export_zeroes_f32:
-; GCN: exp mrt0 off, off, off, off{{$}}
-; GCN: exp mrt0 off, off, off, off done{{$}}
+; GCN: {{exp|export}} mrt0 off, off, off, off{{$}}
+; GCN: {{exp|export}} mrt0 off, off, off, off done{{$}}
 define amdgpu_kernel void @test_export_zeroes_f32() #0 {
 
   call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 false, i1 false)
@@ -23,7 +24,7 @@ define amdgpu_kernel void @test_export_zeroes_f32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
-; GCN: exp mrt0 [[SRC0]], off, off, off done{{$}}
+; GCN: {{exp|export}} mrt0 [[SRC0]], off, off, off done{{$}}
 define amdgpu_kernel void @test_export_en_src0_f32() #0 {
   call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
   ret void
@@ -34,7 +35,7 @@ define amdgpu_kernel void @test_export_en_src0_f32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
-; GCN: exp mrt0 off, [[SRC1]], off, off done{{$}}
+; GCN: {{exp|export}} mrt0 off, [[SRC1]], off, off done{{$}}
 define amdgpu_kernel void @test_export_en_src1_f32() #0 {
   call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
   ret void
@@ -45,7 +46,7 @@ define amdgpu_kernel void @test_export_en_src1_f32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
-; GCN: exp mrt0 off, off, [[SRC2]], off done{{$}}
+; GCN: {{exp|export}} mrt0 off, off, [[SRC2]], off done{{$}}
 define amdgpu_kernel void @test_export_en_src2_f32() #0 {
   call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
   ret void
@@ -56,7 +57,7 @@ define amdgpu_kernel void @test_export_en_src2_f32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
-; GCN: exp mrt0 off, off, off, [[SRC3]] done{{$}}
+; GCN: {{exp|export}} mrt0 off, off, off, [[SRC3]] done{{$}}
 define amdgpu_kernel void @test_export_en_src3_f32() #0 {
   call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
   ret void
@@ -67,7 +68,7 @@ define amdgpu_kernel void @test_export_en_src3_f32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
-; GCN: exp mrt0 [[SRC0]], [[SRC1]], off, off done{{$}}
+; GCN: {{exp|export}} mrt0 [[SRC0]], [[SRC1]], off, off done{{$}}
 define amdgpu_kernel void @test_export_en_src0_src1_f32() #0 {
   call void @llvm.amdgcn.exp.f32(i32 0, i32 3, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
   ret void
@@ -78,7 +79,7 @@ define amdgpu_kernel void @test_export_en_src0_src1_f32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
-; GCN: exp mrt0 [[SRC0]], off, [[SRC2]], off done{{$}}
+; GCN: {{exp|export}} mrt0 [[SRC0]], off, [[SRC2]], off done{{$}}
 define amdgpu_kernel void @test_export_en_src0_src2_f32() #0 {
   call void @llvm.amdgcn.exp.f32(i32 0, i32 5, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
   ret void
@@ -89,8 +90,8 @@ define amdgpu_kernel void @test_export_en_src0_src2_f32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
-; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]]{{$}}
-; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]] done{{$}}
+; GCN: {{exp|export}} mrt0 [[SRC0]], off, off, [[SRC3]]{{$}}
+; GCN: {{exp|export}} mrt0 [[SRC0]], off, off, [[SRC3]] done{{$}}
 define amdgpu_kernel void @test_export_en_src0_src3_f32() #0 {
   call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
@@ -102,8 +103,8 @@ define amdgpu_kernel void @test_export_en_src0_src3_f32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
-; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
-; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+; GCN: {{exp|export}} mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; GCN: {{exp|export}} mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
 define amdgpu_kernel void @test_export_en_src0_src1_src2_src3_f32() #0 {
   call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
@@ -112,8 +113,8 @@ define amdgpu_kernel void @test_export_en_src0_src1_src2_src3_f32() #0 {
 
 ; GCN-LABEL: {{^}}test_export_mrt7_f32:
 ; GCN-DAG: v_mov_b32_e32 [[VHALF:v[0-9]+]], 0.5
-; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]]{{$}}
-; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]] done{{$}}
+; GCN: {{exp|export}} mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]]{{$}}
+; GCN: {{exp|export}} mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]] done{{$}}
 define amdgpu_kernel void @test_export_mrt7_f32() #0 {
   call void @llvm.amdgcn.exp.f32(i32 7, i32 15, float 0.5, float 0.5, float 0.5, float 0.5, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 7, i32 15, float 0.5, float 0.5, float 0.5, float 0.5, i1 true, i1 false)
@@ -125,8 +126,8 @@ define amdgpu_kernel void @test_export_mrt7_f32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
-; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
-; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+; GCN: {{exp|export}} mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; GCN: {{exp|export}} mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
 define amdgpu_kernel void @test_export_z_f32() #0 {
   call void @llvm.amdgcn.exp.f32(i32 8, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 8, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
@@ -138,8 +139,8 @@ define amdgpu_kernel void @test_export_z_f32() #0 {
 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
-; PREGFX11: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
-; PREGFX11: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+; PREGFX11: {{exp|export}} null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; PREGFX11: {{exp|export}} null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
 define amdgpu_kernel void @test_export_null_f32() #0 {
   call void @llvm.amdgcn.exp.f32(i32 9, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 9, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
@@ -151,8 +152,8 @@ define amdgpu_kernel void @test_export_null_f32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
-; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
-; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+; GCN: {{exp|export}} invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; GCN: {{exp|export}} invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
 define amdgpu_kernel void @test_export_reserved10_f32() #0 {
   call void @llvm.amdgcn.exp.f32(i32 10, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 10, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
@@ -164,8 +165,8 @@ define amdgpu_kernel void @test_export_reserved10_f32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
-; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
-; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+; GCN: {{exp|export}} invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; GCN: {{exp|export}} invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
 define amdgpu_kernel void @test_export_reserved11_f32() #0 {
   call void @llvm.amdgcn.exp.f32(i32 11, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 11, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
@@ -177,8 +178,8 @@ define amdgpu_kernel void @test_export_reserved11_f32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
-; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
-; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+; GCN: {{exp|export}} pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; GCN: {{exp|export}} pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
 define amdgpu_kernel void @test_export_pos0_f32() #0 {
   call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
@@ -190,8 +191,8 @@ define amdgpu_kernel void @test_export_pos0_f32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
-; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
-; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+; GCN: {{exp|export}} pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; GCN: {{exp|export}} pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
 define amdgpu_kernel void @test_export_pos3_f32() #0 {
   call void @llvm.amdgcn.exp.f32(i32 15, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 15, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
@@ -203,8 +204,8 @@ define amdgpu_kernel void @test_export_pos3_f32() #0 {
 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
-; PREGFX11: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
-; PREGFX11: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+; PREGFX11: {{exp|export}} param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; PREGFX11: {{exp|export}} param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
 define amdgpu_kernel void @test_export_param0_f32() #0 {
   call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
@@ -216,8 +217,8 @@ define amdgpu_kernel void @test_export_param0_f32() #0 {
 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
-; PREGFX11: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
-; PREGFX11: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+; PREGFX11: {{exp|export}} param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; PREGFX11: {{exp|export}} param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
 define amdgpu_kernel void @test_export_param31_f32() #0 {
   call void @llvm.amdgcn.exp.f32(i32 63, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 63, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
@@ -229,10 +230,10 @@ define amdgpu_kernel void @test_export_param31_f32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2.0
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 0.5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4.0
-; PREGFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] vm{{$}}
-; PREGFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}}
-; GFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
-; GFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+; PREGFX11: {{exp|export}} mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] vm{{$}}
+; PREGFX11: {{exp|export}} mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}}
+; GFX11: {{exp|export}} mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; GFX11: {{exp|export}} mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
 define amdgpu_kernel void @test_export_vm_f32() #0 {
   call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 true)
   call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 true)
@@ -254,8 +255,8 @@ define amdgpu_kernel void @test_export_vm_f32() #0 {
 
 
 ; GCN-LABEL: {{^}}test_export_zeroes_i32:
-; GCN: exp mrt0 off, off, off, off{{$}}
-; GCN: exp mrt0 off, off, off, off done{{$}}
+; GCN: {{exp|export}} mrt0 off, off, off, off{{$}}
+; GCN: {{exp|export}} mrt0 off, off, off, off done{{$}}
 define amdgpu_kernel void @test_export_zeroes_i32() #0 {
 
   call void @llvm.amdgcn.exp.i32(i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false)
@@ -270,7 +271,7 @@ define amdgpu_kernel void @test_export_zeroes_i32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
-; GCN: exp mrt0 [[SRC0]], off, off, off done{{$}}
+; GCN: {{exp|export}} mrt0 [[SRC0]], off, off, off done{{$}}
 define amdgpu_kernel void @test_export_en_src0_i32() #0 {
   call void @llvm.amdgcn.exp.i32(i32 0, i32 1, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
   ret void
@@ -281,7 +282,7 @@ define amdgpu_kernel void @test_export_en_src0_i32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
-; GCN: exp mrt0 off, [[SRC1]], off, off done{{$}}
+; GCN: {{exp|export}} mrt0 off, [[SRC1]], off, off done{{$}}
 define amdgpu_kernel void @test_export_en_src1_i32() #0 {
   call void @llvm.amdgcn.exp.i32(i32 0, i32 2, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
   ret void
@@ -292,7 +293,7 @@ define amdgpu_kernel void @test_export_en_src1_i32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
-; GCN: exp mrt0 off, off, [[SRC2]], off done{{$}}
+; GCN: {{exp|export}} mrt0 off, off, [[SRC2]], off done{{$}}
 define amdgpu_kernel void @test_export_en_src2_i32() #0 {
   call void @llvm.amdgcn.exp.i32(i32 0, i32 4, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
   ret void
@@ -303,7 +304,7 @@ define amdgpu_kernel void @test_export_en_src2_i32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
-; GCN: exp mrt0 off, off, off, [[SRC3]] done{{$}}
+; GCN: {{exp|export}} mrt0 off, off, off, [[SRC3]] done{{$}}
 define amdgpu_kernel void @test_export_en_src3_i32() #0 {
   call void @llvm.amdgcn.exp.i32(i32 0, i32 8, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
   ret void
@@ -314,7 +315,7 @@ define amdgpu_kernel void @test_export_en_src3_i32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
-; GCN: exp mrt0 [[SRC0]], [[SRC1]], off, off done{{$}}
+; GCN: {{exp|export}} mrt0 [[SRC0]], [[SRC1]], off, off done{{$}}
 define amdgpu_kernel void @test_export_en_src0_src1_i32() #0 {
   call void @llvm.amdgcn.exp.i32(i32 0, i32 3, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
   ret void
@@ -325,7 +326,7 @@ define amdgpu_kernel void @test_export_en_src0_src1_i32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
-; GCN: exp mrt0 [[SRC0]], off, [[SRC2]], off done{{$}}
+; GCN: {{exp|export}} mrt0 [[SRC0]], off, [[SRC2]], off done{{$}}
 define amdgpu_kernel void @test_export_en_src0_src2_i32() #0 {
   call void @llvm.amdgcn.exp.i32(i32 0, i32 5, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
   ret void
@@ -336,8 +337,8 @@ define amdgpu_kernel void @test_export_en_src0_src2_i32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
-; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]]{{$}}
-; GCN: exp mrt0 [[SRC0]], off, off, [[SRC3]] done{{$}}
+; GCN: {{exp|export}} mrt0 [[SRC0]], off, off, [[SRC3]]{{$}}
+; GCN: {{exp|export}} mrt0 [[SRC0]], off, off, [[SRC3]] done{{$}}
 define amdgpu_kernel void @test_export_en_src0_src3_i32() #0 {
   call void @llvm.amdgcn.exp.i32(i32 0, i32 9, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
   call void @llvm.amdgcn.exp.i32(i32 0, i32 9, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
@@ -349,8 +350,8 @@ define amdgpu_kernel void @test_export_en_src0_src3_i32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
-; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
-; GCN: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+; GCN: {{exp|export}} mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; GCN: {{exp|export}} mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
 define amdgpu_kernel void @test_export_en_src0_src1_src2_src3_i32() #0 {
   call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
   call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
@@ -359,8 +360,8 @@ define amdgpu_kernel void @test_export_en_src0_src1_src2_src3_i32() #0 {
 
 ; GCN-LABEL: {{^}}test_export_mrt7_i32:
 ; GCN-DAG: v_mov_b32_e32 [[VHALF:v[0-9]+]], 5
-; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]]{{$}}
-; GCN: exp mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]] done{{$}}
+; GCN: {{exp|export}} mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]]{{$}}
+; GCN: {{exp|export}} mrt7 [[VHALF]], [[VHALF]], [[VHALF]], [[VHALF]] done{{$}}
 define amdgpu_kernel void @test_export_mrt7_i32() #0 {
   call void @llvm.amdgcn.exp.i32(i32 7, i32 15, i32 5, i32 5, i32 5, i32 5, i1 false, i1 false)
   call void @llvm.amdgcn.exp.i32(i32 7, i32 15, i32 5, i32 5, i32 5, i32 5, i1 true, i1 false)
@@ -372,8 +373,8 @@ define amdgpu_kernel void @test_export_mrt7_i32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
-; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
-; GCN: exp mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+; GCN: {{exp|export}} mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; GCN: {{exp|export}} mrtz [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
 define amdgpu_kernel void @test_export_z_i32() #0 {
   call void @llvm.amdgcn.exp.i32(i32 8, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
   call void @llvm.amdgcn.exp.i32(i32 8, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
@@ -385,8 +386,8 @@ define amdgpu_kernel void @test_export_z_i32() #0 {
 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
-; PREGFX11: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
-; PREGFX11: exp null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+; PREGFX11: {{exp|export}} null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; PREGFX11: {{exp|export}} null [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
 define amdgpu_kernel void @test_export_null_i32() #0 {
   call void @llvm.amdgcn.exp.i32(i32 9, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
   call void @llvm.amdgcn.exp.i32(i32 9, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
@@ -398,8 +399,8 @@ define amdgpu_kernel void @test_export_null_i32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
-; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
-; GCN: exp invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+; GCN: {{exp|export}} invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; GCN: {{exp|export}} invalid_target_10 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
 define amdgpu_kernel void @test_export_reserved10_i32() #0 {
   call void @llvm.amdgcn.exp.i32(i32 10, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
   call void @llvm.amdgcn.exp.i32(i32 10, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
@@ -411,8 +412,8 @@ define amdgpu_kernel void @test_export_reserved10_i32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
-; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
-; GCN: exp invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+; GCN: {{exp|export}} invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; GCN: {{exp|export}} invalid_target_11 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
 define amdgpu_kernel void @test_export_reserved11_i32() #0 {
   call void @llvm.amdgcn.exp.i32(i32 11, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
   call void @llvm.amdgcn.exp.i32(i32 11, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
@@ -424,8 +425,8 @@ define amdgpu_kernel void @test_export_reserved11_i32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
-; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
-; GCN: exp pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+; GCN: {{exp|export}} pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; GCN: {{exp|export}} pos0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
 define amdgpu_kernel void @test_export_pos0_i32() #0 {
   call void @llvm.amdgcn.exp.i32(i32 12, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
   call void @llvm.amdgcn.exp.i32(i32 12, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
@@ -437,8 +438,8 @@ define amdgpu_kernel void @test_export_pos0_i32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
-; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
-; GCN: exp pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+; GCN: {{exp|export}} pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; GCN: {{exp|export}} pos3 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
 define amdgpu_kernel void @test_export_pos3_i32() #0 {
   call void @llvm.amdgcn.exp.i32(i32 15, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
   call void @llvm.amdgcn.exp.i32(i32 15, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
@@ -450,8 +451,8 @@ define amdgpu_kernel void @test_export_pos3_i32() #0 {
 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
-; PREGFX11: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
-; PREGFX11: exp param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+; PREGFX11: {{exp|export}} param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; PREGFX11: {{exp|export}} param0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
 define amdgpu_kernel void @test_export_param0_i32() #0 {
   call void @llvm.amdgcn.exp.i32(i32 32, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
   call void @llvm.amdgcn.exp.i32(i32 32, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
@@ -463,8 +464,8 @@ define amdgpu_kernel void @test_export_param0_i32() #0 {
 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
 ; PREGFX11-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
-; PREGFX11: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
-; PREGFX11: exp param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+; PREGFX11: {{exp|export}} param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; PREGFX11: {{exp|export}} param31 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
 define amdgpu_kernel void @test_export_param31_i32() #0 {
   call void @llvm.amdgcn.exp.i32(i32 63, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 false)
   call void @llvm.amdgcn.exp.i32(i32 63, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 false)
@@ -476,10 +477,10 @@ define amdgpu_kernel void @test_export_param31_i32() #0 {
 ; GCN-DAG: v_mov_b32_e32 [[SRC1:v[0-9]+]], 2
 ; GCN-DAG: v_mov_b32_e32 [[SRC2:v[0-9]+]], 5
 ; GCN-DAG: v_mov_b32_e32 [[SRC3:v[0-9]+]], 4
-; PREGFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] vm{{$}}
-; PREGFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}}
-; GFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
-; GFX11: exp mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
+; PREGFX11: {{exp|export}} mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] vm{{$}}
+; PREGFX11: {{exp|export}} mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done vm{{$}}
+; GFX11: {{exp|export}} mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]]{{$}}
+; GFX11: {{exp|export}} mrt0 [[SRC0]], [[SRC1]], [[SRC2]], [[SRC3]] done{{$}}
 define amdgpu_kernel void @test_export_vm_i32() #0 {
   call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 false, i1 true)
   call void @llvm.amdgcn.exp.i32(i32 0, i32 15, i32 1, i32 2, i32 5, i32 4, i1 true, i1 true)
@@ -554,15 +555,15 @@ end:
 ; GFX8-DAG: v_mov_b32_e32 [[Y:v[0-9]+]], s0
 ; GFX8-DAG: v_add_f32_e{{32|64}} [[Z0:v[0-9]+]]
 ; GFX8-DAG: v_sub_f32_e{{32|64}} [[Z1:v[0-9]+]]
-; GFX8: exp param0 [[Y]], [[X]], [[Z0]], [[W0]]{{$}}
-; GFX8-NEXT: exp param1 [[Y]], [[X]], [[Z1]], [[W1]] done{{$}}
+; GFX8: {{exp|export}} param0 [[Y]], [[X]], [[Z0]], [[W0]]{{$}}
+; GFX8-NEXT: {{exp|export}} param1 [[Y]], [[X]], [[Z1]], [[W1]] done{{$}}
 
 ; GFX10-DAG: v_mov_b32_e32 [[X:v[0-9]+]], s0
 ; GFX10-DAG: v_mov_b32_e32 [[Y:v[0-9]+]], s1
 ; GFX10-DAG: v_add_f32_e{{32|64}} [[Z0:v[0-9]+]]
 ; GFX10-DAG: v_sub_f32_e{{32|64}} [[Z1:v[0-9]+]]
-; GFX10: exp param0 [[X]], [[Y]], [[Z0]], [[W0]]{{$}}
-; GFX10-NEXT: exp param1 [[X]], [[Y]], [[Z1]], [[W1]] done{{$}}
+; GFX10: {{exp|export}} param0 [[X]], [[Y]], [[Z0]], [[W0]]{{$}}
+; GFX10-NEXT: {{exp|export}} param1 [[X]], [[Y]], [[Z1]], [[W1]] done{{$}}
 define amdgpu_kernel void @test_export_clustering(float %x, float %y) #0 {
   %z0 = fadd float %x, %y
   call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %x, float %y, float %z0, float 0.0, i1 false, i1 false)
@@ -572,9 +573,9 @@ define amdgpu_kernel void @test_export_clustering(float %x, float %y) #0 {
 }
 
 ; GCN-LABEL: {{^}}test_export_pos_before_param:
-; PREGFX11: exp pos0
+; PREGFX11: {{exp|export}} pos0
 ; PREGFX11-NOT: s_waitcnt
-; PREGFX11: exp param0
+; PREGFX11: {{exp|export}} param0
 define amdgpu_kernel void @test_export_pos_before_param(float %x, float %y) #0 {
   %z0 = fadd float %x, %y
   call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
@@ -584,9 +585,9 @@ define amdgpu_kernel void @test_export_pos_before_param(float %x, float %y) #0 {
 }
 
 ; GCN-LABEL: {{^}}test_export_pos4_before_param:
-; GFX10: exp pos4
+; GFX10: {{exp|export}} pos4
 ; GFX10-NOT: s_waitcnt
-; GFX10: exp param0
+; GFX10: {{exp|export}} param0
 define amdgpu_kernel void @test_export_pos4_before_param(float %x, float %y) #0 {
   %z0 = fadd float %x, %y
   call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
@@ -596,13 +597,13 @@ define amdgpu_kernel void @test_export_pos4_before_param(float %x, float %y) #0
 }
 
 ; GCN-LABEL: {{^}}test_export_pos_before_param_ordered:
-; PREGFX11: exp pos0
-; PREGFX11: exp pos1
-; PREGFX11: exp pos2
+; PREGFX11: {{exp|export}} pos0
+; PREGFX11: {{exp|export}} pos1
+; PREGFX11: {{exp|export}} pos2
 ; PREGFX11-NOT: s_waitcnt
-; PREGFX11: exp param0
-; PREGFX11: exp param1
-; PREGFX11: exp param2
+; PREGFX11: {{exp|export}} param0
+; PREGFX11: {{exp|export}} param1
+; PREGFX11: {{exp|export}} param2
 define amdgpu_kernel void @test_export_pos_before_param_ordered(float %x, float %y) #0 {
   %z0 = fadd float %x, %y
   call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float %z0, i1 false, i1 false)
@@ -616,9 +617,9 @@ define amdgpu_kernel void @test_export_pos_before_param_ordered(float %x, float
 }
 
 ; GCN-LABEL: {{^}}test_export_pos_before_param_across_load:
-; PREGFX11: exp pos0
-; PREGFX11-NEXT: exp param0
-; PREGFX11-NEXT: exp param1
+; PREGFX11: {{exp|export}} pos0
+; PREGFX11-NEXT: {{exp|export}} param0
+; PREGFX11-NEXT: {{exp|export}} param1
 define amdgpu_kernel void @test_export_pos_before_param_across_load(i32 %idx) #0 {
   call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false)
   call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false)
@@ -630,9 +631,9 @@ define amdgpu_kernel void @test_export_pos_before_param_across_load(i32 %idx) #0
 ; GCN-LABEL: {{^}}test_export_across_store_load:
 ; PREGFX11: buffer_store
 ; PREGFX11: buffer_load
-; PREGFX11: exp pos0
-; PREGFX11: exp param0
-; PREGFX11: exp param1
+; PREGFX11: {{exp|export}} pos0
+; PREGFX11: {{exp|export}} param0
+; PREGFX11: {{exp|export}} param1
 define amdgpu_kernel void @test_export_across_store_load(i32 %idx, float %v) #0 {
   %data0 = alloca <4 x float>, align 8, addrspace(5)
   %data1 = alloca <4 x float>, align 8, addrspace(5)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.prim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.prim.ll
index 52441bcb82f5c..429ee21598384 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.prim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.prim.ll
@@ -1,12 +1,13 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=NOPRIM %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=PRIM %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=PRIM %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=PRIM %s
 
 declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1
 
 ; GCN-LABEL: {{^}}test_export_prim_i32:
 ; NOPRIM: exp invalid_target_20 v0, off, off, off done{{$}}
-; PRIM: exp prim v0, off, off, off done{{$}}
+; PRIM: {{exp|export}} prim v0, off, off, off done{{$}}
 define amdgpu_gs void @test_export_prim_i32(i32 inreg %a) #0 {
   call void @llvm.amdgcn.exp.i32(i32 20, i32 1, i32 %a, i32 undef, i32 undef, i32 undef, i1 true, i1 false)
   ret void
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.row.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.row.ll
index 18c711d0b2aec..c62a8882c4245 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.row.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.row.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11,GFX11-SDAG
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11,GFX11-GISEL
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX12,GFX12-SDAG
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX12,GFX12-GISEL
 
 declare void @llvm.amdgcn.exp.row.i32(i32, i32, i32, i32, i32, i32, i1, i32)
 declare void @llvm.amdgcn.exp.row.f32(i32, i32, float, float, float, float, i1, i32)
@@ -13,6 +15,13 @@ define amdgpu_kernel void @undef_i32() #0 {
 ; GFX11-NEXT:    exp pos0 off, off, off, off row_en
 ; GFX11-NEXT:    exp pos1 off, off, off, off done row_en
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: undef_i32:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_mov_b32 m0, 0
+; GFX12-NEXT:    export pos0 off, off, off, off row_en
+; GFX12-NEXT:    export pos1 off, off, off, off done row_en
+; GFX12-NEXT:    s_endpgm
   call void @llvm.amdgcn.exp.row.i32(i32 12, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i1 false, i32 0)
   call void @llvm.amdgcn.exp.row.i32(i32 13, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i1 true, i32 0)
   ret void
@@ -25,6 +34,13 @@ define amdgpu_kernel void @undef_f32() #0 {
 ; GFX11-NEXT:    exp pos0 off, off, off, off row_en
 ; GFX11-NEXT:    exp pos1 off, off, off, off done row_en
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: undef_f32:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_mov_b32 m0, 0
+; GFX12-NEXT:    export pos0 off, off, off, off row_en
+; GFX12-NEXT:    export pos1 off, off, off, off done row_en
+; GFX12-NEXT:    s_endpgm
   call void @llvm.amdgcn.exp.row.f32(i32 12, i32 0, float undef, float undef, float undef, float undef, i1 false, i32 0)
   call void @llvm.amdgcn.exp.row.f32(i32 13, i32 0, float undef, float undef, float undef, float undef, i1 true, i32 0)
   ret void
@@ -38,6 +54,14 @@ define amdgpu_kernel void @zero_i32() #0 {
 ; GFX11-NEXT:    exp pos0 v0, v0, v0, off row_en
 ; GFX11-NEXT:    exp pos1 v0, v0, v0, off done row_en
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: zero_i32:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    v_mov_b32_e32 v0, 0
+; GFX12-NEXT:    s_mov_b32 m0, 0
+; GFX12-NEXT:    export pos0 v0, v0, v0, off row_en
+; GFX12-NEXT:    export pos1 v0, v0, v0, off done row_en
+; GFX12-NEXT:    s_endpgm
   call void @llvm.amdgcn.exp.row.i32(i32 12, i32 7, i32 0, i32 0, i32 0, i32 undef, i1 false, i32 0)
   call void @llvm.amdgcn.exp.row.i32(i32 13, i32 7, i32 0, i32 0, i32 0, i32 undef, i1 true, i32 0)
   ret void
@@ -51,6 +75,14 @@ define amdgpu_kernel void @one_f32() #0 {
 ; GFX11-NEXT:    exp pos0 v0, v0, v0, off row_en
 ; GFX11-NEXT:    exp pos1 v0, v0, v0, off done row_en
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: one_f32:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    v_mov_b32_e32 v0, 1.0
+; GFX12-NEXT:    s_mov_b32 m0, 0
+; GFX12-NEXT:    export pos0 v0, v0, v0, off row_en
+; GFX12-NEXT:    export pos1 v0, v0, v0, off done row_en
+; GFX12-NEXT:    s_endpgm
   call void @llvm.amdgcn.exp.row.f32(i32 12, i32 7, float 1.0, float 1.0, float 1.0, float undef, i1 false, i32 0)
   call void @llvm.amdgcn.exp.row.f32(i32 13, i32 7, float 1.0, float 1.0, float 1.0, float undef, i1 true, i32 0)
   ret void
@@ -63,6 +95,13 @@ define amdgpu_kernel void @id_i32() #0 {
 ; GFX11-NEXT:    s_mov_b32 m0, 0
 ; GFX11-NEXT:    exp pos0 v0, off, off, off done row_en
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: id_i32:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX12-NEXT:    s_mov_b32 m0, 0
+; GFX12-NEXT:    export pos0 v0, off, off, off done row_en
+; GFX12-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   call void @llvm.amdgcn.exp.row.i32(i32 12, i32 1, i32 %id, i32 undef, i32 undef, i32 undef, i1 true, i32 0)
   ret void
@@ -77,6 +116,15 @@ define amdgpu_kernel void @id_arg_i32(i32 %row) #0 {
 ; GFX11-NEXT:    s_mov_b32 m0, s0
 ; GFX11-NEXT:    exp pos0 v0, off, off, off done row_en
 ; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: id_arg_i32:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_load_b32 s0, s[2:3], 0x24
+; GFX12-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_mov_b32 m0, s0
+; GFX12-NEXT:    export pos0 v0, off, off, off done row_en
+; GFX12-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   call void @llvm.amdgcn.exp.row.i32(i32 12, i32 1, i32 %id, i32 undef, i32 undef, i32 undef, i1 true, i32 %row)
   ret void
@@ -102,6 +150,25 @@ define amdgpu_kernel void @id_row_i32() #0 {
 ; GFX11-GISEL-NEXT:    v_readfirstlane_b32 m0, v0
 ; GFX11-GISEL-NEXT:    exp pos0 v1, off, off, off done row_en
 ; GFX11-GISEL-NEXT:    s_endpgm
+;
+; GFX12-SDAG-LABEL: id_row_i32:
+; GFX12-SDAG:       ; %bb.0:
+; GFX12-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX12-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX12-SDAG-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, 0x63
+; GFX12-SDAG-NEXT:    s_mov_b32 m0, s0
+; GFX12-SDAG-NEXT:    export pos0 v0, off, off, off done row_en
+; GFX12-SDAG-NEXT:    s_endpgm
+;
+; GFX12-GISEL-LABEL: id_row_i32:
+; GFX12-GISEL:       ; %bb.0:
+; GFX12-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
+; GFX12-GISEL-NEXT:    v_mov_b32_e32 v1, 0x63
+; GFX12-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; GFX12-GISEL-NEXT:    v_readfirstlane_b32 m0, v0
+; GFX12-GISEL-NEXT:    export pos0 v1, off, off, off done row_en
+; GFX12-GISEL-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   call void @llvm.amdgcn.exp.row.i32(i32 12, i32 1, i32 99, i32 undef, i32 undef, i32 undef, i1 true, i32 %id)
   ret void