[llvm] [AMDGPU][MC] Allow 128-bit rsrc register in MIMG instructions (PR #132264)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 20 11:00:03 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Jun Wang (jwanggit86)
<details>
<summary>Changes</summary>
The r128 field in MIMG instructions indicates that the resource register is 128-bit. However, the assembler will reject instructions with 128-bit resource register even when r128 is present. This patch fixes this problem.
---
Patch is 151.74 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/132264.diff
16 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/MIMGInstructions.td (+27-23)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir (+8-8)
- (modified) llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir (+12-12)
- (modified) llvm/test/CodeGen/AMDGPU/memory_clause.mir (+8-8)
- (modified) llvm/test/CodeGen/AMDGPU/merge-image-sample.mir (+199-199)
- (modified) llvm/test/CodeGen/AMDGPU/subreg-split-live-in-error.mir (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/waitcnt-vmcnt-loop.mir (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/waitcnt-vmem-waw.mir (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/wqm.mir (+11-11)
- (modified) llvm/test/MC/AMDGPU/gfx8_asm_mimg.s (+39)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 1b94d6c43392d..5c220f7511833 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -871,11 +871,11 @@ multiclass MIMG_Store <mimgopc op, string asm, bit has_d16, bit mip = 0> {
}
class MIMG_Atomic_gfx6789_base <bits<8> op, string asm, RegisterClass data_rc,
- RegisterClass addr_rc, string dns="">
+ RegisterClass addr_rc, string dns="", bits<1> hasR128=false>
: MIMG_gfx6789 <op, (outs data_rc:$vdst), dns> {
let Constraints = "$vdst = $vdata";
- let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
+ let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, !if(hasR128, SReg_128_XNULL, SReg_256_XNULL):$srsrc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da);
let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$cpol$r128$tfe$lwe$da";
@@ -893,18 +893,20 @@ class MIMG_Atomic_gfx90a_base <bits<8> op, string asm, RegisterClass data_rc,
let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$cpol$r128$lwe$da";
}
-class MIMG_Atomic_si<mimgopc op, string asm, RegisterClass data_rc,
- RegisterClass addr_rc, bit enableDasm = 0>
- : MIMG_Atomic_gfx6789_base<op.SI, asm, data_rc, addr_rc,
- !if(enableDasm, "GFX6GFX7", "")> {
- let AssemblerPredicate = isGFX6GFX7;
+multiclass MIMG_Atomic_si<mimgopc op, string asm, RegisterClass data_rc, RegisterClass addr_rc, bit enableDasm = 0> {
+ let AssemblerPredicate = isGFX6GFX7 in {
+ def _r1_si : MIMG_Atomic_gfx6789_base<op.SI, asm, data_rc, addr_rc, !if(enableDasm, "GFX6GFX7", "")>;
+ def _r2_si : MIMG_Atomic_gfx6789_base<op.SI, asm, data_rc, addr_rc, "", /*hasR128*/ true>;
+ }
}
-class MIMG_Atomic_vi<mimgopc op, string asm, RegisterClass data_rc,
- RegisterClass addr_rc, bit enableDasm = 0>
- : MIMG_Atomic_gfx6789_base<op.VI, asm, data_rc, addr_rc, !if(enableDasm, "GFX8", "")> {
- let AssemblerPredicate = isGFX8GFX9NotGFX90A;
- let MIMGEncoding = MIMGEncGfx8;
+multiclass MIMG_Atomic_vi<mimgopc op, string asm, RegisterClass data_rc, RegisterClass addr_rc, bit enableDasm = 0> {
+ let AssemblerPredicate = isGFX8GFX9NotGFX90A, MIMGEncoding = MIMGEncGfx8 in {
+ def _r1_vi : MIMG_Atomic_gfx6789_base<op.VI, asm, data_rc, addr_rc, !if(enableDasm, "GFX8", "")>;
+ }
+ let AssemblerPredicate = isGFX8Only, MIMGEncoding = MIMGEncGfx8 in {
+ def _r2_vi : MIMG_Atomic_gfx6789_base<op.VI, asm, data_rc, addr_rc, "", /*hasR128*/ true>;
+ }
}
class MIMG_Atomic_gfx90a<mimgopc op, string asm, RegisterClass data_rc,
@@ -995,10 +997,10 @@ multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
let VAddrDwords = 1 in {
let ssamp = 0 in {
if op.HAS_SI then {
- def _V1_si : MIMG_Atomic_si <op, asm, data_rc, VGPR_32, enableDasm>;
+ defm _V1 : MIMG_Atomic_si <op, asm, data_rc, VGPR_32, enableDasm>;
}
if op.HAS_VI then {
- def _V1_vi : MIMG_Atomic_vi <op, asm, data_rc, VGPR_32, enableDasm>;
+ defm _V1 : MIMG_Atomic_vi <op, asm, data_rc, VGPR_32, enableDasm>;
let hasPostISelHook = 1 in
def _V1_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VGPR_32, enableDasm>;
}
@@ -1016,10 +1018,10 @@ multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
let VAddrDwords = 2 in {
let ssamp = 0 in {
if op.HAS_SI then {
- def _V2_si : MIMG_Atomic_si <op, asm, data_rc, VReg_64, 0>;
+ defm _V2 : MIMG_Atomic_si <op, asm, data_rc, VReg_64, 0>;
}
if op.HAS_VI then {
- def _V2_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_64, 0>;
+ defm _V2 : MIMG_Atomic_vi <op, asm, data_rc, VReg_64, 0>;
def _V2_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_64, 0>;
}
if op.HAS_GFX10M then {
@@ -1038,10 +1040,10 @@ multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
let VAddrDwords = 3 in {
let ssamp = 0 in {
if op.HAS_SI then {
- def _V3_si : MIMG_Atomic_si <op, asm, data_rc, VReg_96, 0>;
+ defm _V3 : MIMG_Atomic_si <op, asm, data_rc, VReg_96, 0>;
}
if op.HAS_VI then {
- def _V3_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_96, 0>;
+ defm _V3 : MIMG_Atomic_vi <op, asm, data_rc, VReg_96, 0>;
def _V3_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_96, 0>;
}
if op.HAS_GFX10M then {
@@ -1060,10 +1062,10 @@ multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
let VAddrDwords = 4 in {
let ssamp = 0 in {
if op.HAS_SI then {
- def _V4_si : MIMG_Atomic_si <op, asm, data_rc, VReg_128, 0>;
+ defm _V4 : MIMG_Atomic_si <op, asm, data_rc, VReg_128, 0>;
}
if op.HAS_VI then {
- def _V4_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_128, 0>;
+ defm _V4 : MIMG_Atomic_vi <op, asm, data_rc, VReg_128, 0>;
def _V4_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_128, 0>;
}
if op.HAS_GFX10M then {
@@ -1126,9 +1128,9 @@ multiclass MIMG_Atomic_Renamed <mimgopc op, string asm, string renamed,
: MIMG_Atomic <op, asm, isCmpSwap, isFP, renamed>;
class MIMG_Sampler_Helper <mimgopc op, string asm, RegisterClass dst_rc,
- RegisterClass src_rc, string dns="">
+ RegisterClass src_rc, string dns="", bits<1> hasR128=false>
: MIMG_gfx6789 <op.VI, (outs dst_rc:$vdata), dns> {
- let InOperandList = !con((ins src_rc:$vaddr, SReg_256_XNULL:$srsrc, SReg_128_XNULL:$ssamp,
+ let InOperandList = !con((ins src_rc:$vaddr, !if(hasR128, SReg_128_XNULL, SReg_256_XNULL):$srsrc, SReg_128_XNULL:$ssamp,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -1349,9 +1351,11 @@ multiclass MIMG_Sampler_Src_Helper <mimgopc op, string asm,
foreach addr = MIMG_Sampler_AddrSizes<sample, isG16>.MachineInstrs in {
let VAddrDwords = addr.NumWords in {
if op.HAS_GFX10M then {
- def _V # addr.NumWords
+ def _V # addr.NumWords # _r1
: MIMG_Sampler_Helper <op, asm, dst_rc, addr.RegClass,
!if(!and(enableDisasm, addr.Disassemble), "GFX8", "")>;
+ def _V # addr.NumWords # _r2
+ : MIMG_Sampler_Helper <op, asm, dst_rc, addr.RegClass, "", /*hasR128*/ true>;
if !not(ExtendedImageInst) then
def _V # addr.NumWords # _gfx90a
: MIMG_Sampler_gfx90a <op, asm, dst_rc, addr.RegClass,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir
index 292fa4be1ca1d..36804a7c78f97 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir
@@ -21,7 +21,7 @@ body: |
; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
+ ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_r1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si]].sub0
; GFX6-NEXT: $vgpr0 = COPY [[COPY3]]
; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
@@ -31,7 +31,7 @@ body: |
; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
+ ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_r1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi]].sub0
; GFX8-NEXT: $vgpr0 = COPY [[COPY3]]
; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
@@ -89,7 +89,7 @@ body: |
; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
+ ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_r1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
; GFX6-NEXT: S_ENDPGM 0
; GFX8-LABEL: name: atomic_cmpswap_i32_1d_no_return
; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2
@@ -97,7 +97,7 @@ body: |
; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
+ ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_r1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
; GFX8-NEXT: S_ENDPGM 0
; GFX10-LABEL: name: atomic_cmpswap_i32_1d_no_return
; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2
@@ -146,7 +146,7 @@ body: |
; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
+ ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_r1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si]].sub0_sub1
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]]
; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1
@@ -156,7 +156,7 @@ body: |
; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
+ ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_r1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
; GFX8-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi]].sub0_sub1
; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]]
; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1
@@ -214,7 +214,7 @@ body: |
; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
+ ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_r1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
; GFX6-NEXT: S_ENDPGM 0
; GFX8-LABEL: name: atomic_cmpswap_i64_1d_no_return
; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
@@ -222,7 +222,7 @@ body: |
; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
+ ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_r1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
; GFX8-NEXT: S_ENDPGM 0
; GFX10-LABEL: name: atomic_cmpswap_i64_1d_no_return
; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
diff --git a/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir b/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir
index 3c3c9839755a2..04537a3dab3c8 100644
--- a/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir
+++ b/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx908.mir
@@ -92,7 +92,7 @@ body: |
%1:vgpr_32 = COPY $vgpr2
%3:sgpr_256 = IMPLICIT_DEF
%2:vreg_256 = COPY %3:sgpr_256
- %4:vreg_128 = IMAGE_SAMPLE_C_CL_O_V4_V8 %2, %3:sgpr_256, undef %5:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
+ %4:vreg_128 = IMAGE_SAMPLE_C_CL_O_V4_V8_r1 %2, %3:sgpr_256, undef %5:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
diff --git a/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir b/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir
index c42b570b40812..ea147a34f018a 100644
--- a/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir
+++ b/llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir
@@ -94,7 +94,7 @@ body: |
%1:vgpr_32 = COPY $vgpr2
%3:sgpr_256 = IMPLICIT_DEF
%2:vreg_256_align2 = COPY %3:sgpr_256
- %4:vreg_128_align2 = IMAGE_SAMPLE_C_CL_O_V4_V8 %2, %3:sgpr_256, undef %5:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
+ %4:vreg_128_align2 = IMAGE_SAMPLE_C_CL_O_V4_V8_r1 %2, %3:sgpr_256, undef %5:sgpr_128, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 4)
GLOBAL_STORE_DWORDX4 %0, %2.sub0_sub1_sub2_sub3, 0, 0, implicit $exec
GLOBAL_STORE_DWORD %0, %1, 0, 0, implicit $exec
...
diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir b/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir
index ac4f83b0a01ff..10e4130ef13a1 100644
--- a/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir
+++ b/llvm/test/CodeGen/AMDGPU/coalescer-subranges-another-prune-error.mir
@@ -257,7 +257,7 @@ body: |
%109.sub5:sgpr_256 = COPY %108
%109.sub6:sgpr_256 = COPY %108
%109.sub7:sgpr_256 = COPY killed %108
- %110:vgpr_32 = IMAGE_SAMPLE_V1_V2 killed %107, killed %109, undef %111:sgpr_128, 8, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from constant-pool, addrspace 4)
+ %110:vgpr_32 = IMAGE_SAMPLE_V1_V2_r1 killed %107, killed %109, undef %111:sgpr_128, 8, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from constant-pool, addrspace 4)
%112:vgpr_32 = nofpexcept V_MUL_F32_e32 0, killed %110, implicit $mode, implicit $exec
%113:vgpr_32 = nofpexcept V_MUL_F32_e32 0, killed %112, implicit $mode, implicit $exec
%114:vgpr_32 = nofpexcept V_MAD_F32_e64 0, killed %113, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir b/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir
index 350b2738a9cca..c1dbb8f6a9811 100644
--- a/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir
+++ b/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir
@@ -1,7 +1,7 @@
# RUN: llc -mtriple=amdgcn -run-pass register-coalescer -o - %s | FileCheck %s
# Check that %11 and %20 have been coalesced.
-# CHECK: IMAGE_SAMPLE_C_D_O_V1_V11 %[[REG:[0-9]+]]
-# CHECK: IMAGE_SAMPLE_C_D_O_V1_V11 %[[REG]]
+# CHECK: IMAGE_SAMPLE_C_D_O_V1_V11_r1 %[[REG:[0-9]+]]
+# CHECK: IMAGE_SAMPLE_C_D_O_V1_V11_r1 %[[REG]]
---
name: main
@@ -61,7 +61,7 @@ body: |
%11.sub6 = COPY %1
%11.sub7 = COPY %1
%11.sub8 = COPY %1
- dead %18 = IMAGE_SAMPLE_C_D_O_V1_V11 %11, %3, %4, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load (s32))
+ dead %18 = IMAGE_SAMPLE_C_D_O_V1_V11_r1 %11, %3, %4, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load (s32))
%20.sub1 = COPY %2
%20.sub2 = COPY %2
%20.sub3 = COPY %2
@@ -70,6 +70,6 @@ body: |
%20.sub6 = COPY %2
%20.sub7 = COPY %2
%20.sub8 = COPY %2
- dead %27 = IMAGE_SAMPLE_C_D_O_V1_V11 %20, %5, %6, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load (s32))
+ dead %27 = IMAGE_SAMPLE_C_D_O_V1_V11_r1 %20, %5, %6, 1, 0, 0, 0, 0, 0, -1, 0, implicit $exec :: (load (s32))
...
diff --git a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir
index 215200c770245..f8c163417c053 100644
--- a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir
+++ b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir
@@ -45,8 +45,8 @@ body: |
; CHECK-NEXT: bb.2:
; CHECK-NEXT: liveins: $sgpr24_sgpr25_sgpr26_sgpr27:0x000000000000000F, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FFFF
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: dead [[IMAGE_SAMPLE_LZ_V1_V2_:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF2]], killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, undef renamable $sgpr24_sgpr25_sgpr26_sgpr27, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
- ; CHECK-NEXT: dead [[IMAGE_SAMPLE_LZ_V1_V2_1:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF2]], killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, renamable $sgpr24_sgpr25_sgpr26_sgpr27, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8)
+ ; CHECK-NEXT: dead [[IMAGE_SAMPLE_LZ_V1_V2_:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2_r1 undef [[DEF2]], killed renamable $sgpr36_sgpr...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/132264
More information about the llvm-commits
mailing list