[llvm] [AMDGPU] Support image atomic no return instructions (PR #150742)
Harrison Hao via llvm-commits
llvm-commits at lists.llvm.org
Sat Sep 20 06:36:17 PDT 2025
https://github.com/harrisonGPU updated https://github.com/llvm/llvm-project/pull/150742
>From eb01fe9ad983a83995424e093c478985263cb8e9 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Sat, 26 Jul 2025 05:33:34 +0000
Subject: [PATCH 1/7] [AMDGPU] Avoid setting GLC for image atomics when result
is unused
---
.../AMDGPU/AMDGPUInstructionSelector.cpp | 12 +++++--
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 6 ++--
.../llvm.amdgcn.image.atomic.dim.ll | 24 ++++++-------
.../llvm.amdgcn.image.atomic.dim.mir | 36 +++++++++++++------
.../AMDGPU/llvm.amdgcn.image.atomic.pk.add.ll | 20 +++++------
5 files changed, 61 insertions(+), 37 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 12915c7344426..6b03923f5df61 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2099,8 +2099,10 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
assert((!IsTexFail || DMaskLanes >= 1) && "should have legalized this");
unsigned CPol = MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm();
- if (BaseOpcode->Atomic)
- CPol |= AMDGPU::CPol::GLC; // TODO no-return optimization
+ // Keep GLC only when the atomic's result is actually used.
+ if (BaseOpcode->Atomic && VDataOut && !MRI->use_empty(VDataOut))
+ CPol |= AMDGPU::CPol::GLC;
+
if (CPol & ~((IsGFX12Plus ? AMDGPU::CPol::ALL : AMDGPU::CPol::ALL_pregfx12) |
AMDGPU::CPol::VOLATILE))
return false;
@@ -2190,7 +2192,11 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
}
} else {
- MIB.addDef(VDataOut); // vdata output
+ // If VDataOut is unused, mark it as dead to avoid unnecessary VGPR usage.
+ if (BaseOpcode->Atomic && MRI->use_empty(VDataOut))
+ MIB.addDef(VDataOut, RegState::Dead);
+ else
+ MIB.addDef(VDataOut);
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 363717b017ef0..431e6284f3d66 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9414,8 +9414,10 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
}
unsigned CPol = Op.getConstantOperandVal(ArgOffset + Intr->CachePolicyIndex);
- if (BaseOpcode->Atomic)
- CPol |= AMDGPU::CPol::GLC; // TODO no-return optimization
+ // Keep GLC only when the atomic's result is actually used.
+ if (BaseOpcode->Atomic && !Op.getValue(0).use_empty())
+ CPol |= AMDGPU::CPol::GLC;
+
if (CPol & ~((IsGFX12Plus ? AMDGPU::CPol::ALL : AMDGPU::CPol::ALL_pregfx12) |
AMDGPU::CPol::VOLATILE))
return Op;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll
index 221e2fd4f00f7..09e1fca3f2677 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll
@@ -1200,7 +1200,7 @@ define amdgpu_ps void @atomic_cmpswap_i32_1d_no_return(<8 x i32> inreg %rsrc, i3
; GFX6-NEXT: s_mov_b32 s5, s7
; GFX6-NEXT: s_mov_b32 s6, s8
; GFX6-NEXT: s_mov_b32 s7, s9
-; GFX6-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
+; GFX6-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: atomic_cmpswap_i32_1d_no_return:
@@ -1213,7 +1213,7 @@ define amdgpu_ps void @atomic_cmpswap_i32_1d_no_return(<8 x i32> inreg %rsrc, i3
; GFX8-NEXT: s_mov_b32 s5, s7
; GFX8-NEXT: s_mov_b32 s6, s8
; GFX8-NEXT: s_mov_b32 s7, s9
-; GFX8-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
+; GFX8-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm
; GFX8-NEXT: s_endpgm
;
; GFX900-LABEL: atomic_cmpswap_i32_1d_no_return:
@@ -1226,7 +1226,7 @@ define amdgpu_ps void @atomic_cmpswap_i32_1d_no_return(<8 x i32> inreg %rsrc, i3
; GFX900-NEXT: s_mov_b32 s5, s7
; GFX900-NEXT: s_mov_b32 s6, s8
; GFX900-NEXT: s_mov_b32 s7, s9
-; GFX900-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
+; GFX900-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm
; GFX900-NEXT: s_endpgm
;
; GFX90A-LABEL: atomic_cmpswap_i32_1d_no_return:
@@ -1239,7 +1239,7 @@ define amdgpu_ps void @atomic_cmpswap_i32_1d_no_return(<8 x i32> inreg %rsrc, i3
; GFX90A-NEXT: s_mov_b32 s5, s7
; GFX90A-NEXT: s_mov_b32 s6, s8
; GFX90A-NEXT: s_mov_b32 s7, s9
-; GFX90A-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc
+; GFX90A-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm
; GFX90A-NEXT: s_endpgm
;
; GFX10PLUS-LABEL: atomic_cmpswap_i32_1d_no_return:
@@ -1252,7 +1252,7 @@ define amdgpu_ps void @atomic_cmpswap_i32_1d_no_return(<8 x i32> inreg %rsrc, i3
; GFX10PLUS-NEXT: s_mov_b32 s5, s7
; GFX10PLUS-NEXT: s_mov_b32 s6, s8
; GFX10PLUS-NEXT: s_mov_b32 s7, s9
-; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc
+; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_cmpswap_i32_1d_no_return:
@@ -1265,7 +1265,7 @@ define amdgpu_ps void @atomic_cmpswap_i32_1d_no_return(<8 x i32> inreg %rsrc, i3
; GFX12-NEXT: s_mov_b32 s5, s7
; GFX12-NEXT: s_mov_b32 s6, s8
; GFX12-NEXT: s_mov_b32 s7, s9
-; GFX12-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
+; GFX12-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D
; GFX12-NEXT: s_endpgm
main_body:
%v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -3194,7 +3194,7 @@ define amdgpu_ps void @atomic_cmpswap_i64_1d_no_return(<8 x i32> inreg %rsrc, i6
; GFX6-NEXT: s_mov_b32 s5, s7
; GFX6-NEXT: s_mov_b32 s6, s8
; GFX6-NEXT: s_mov_b32 s7, s9
-; GFX6-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
+; GFX6-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm
; GFX6-NEXT: s_endpgm
;
; GFX8-LABEL: atomic_cmpswap_i64_1d_no_return:
@@ -3207,7 +3207,7 @@ define amdgpu_ps void @atomic_cmpswap_i64_1d_no_return(<8 x i32> inreg %rsrc, i6
; GFX8-NEXT: s_mov_b32 s5, s7
; GFX8-NEXT: s_mov_b32 s6, s8
; GFX8-NEXT: s_mov_b32 s7, s9
-; GFX8-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
+; GFX8-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm
; GFX8-NEXT: s_endpgm
;
; GFX900-LABEL: atomic_cmpswap_i64_1d_no_return:
@@ -3220,7 +3220,7 @@ define amdgpu_ps void @atomic_cmpswap_i64_1d_no_return(<8 x i32> inreg %rsrc, i6
; GFX900-NEXT: s_mov_b32 s5, s7
; GFX900-NEXT: s_mov_b32 s6, s8
; GFX900-NEXT: s_mov_b32 s7, s9
-; GFX900-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
+; GFX900-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm
; GFX900-NEXT: s_endpgm
;
; GFX90A-LABEL: atomic_cmpswap_i64_1d_no_return:
@@ -3233,7 +3233,7 @@ define amdgpu_ps void @atomic_cmpswap_i64_1d_no_return(<8 x i32> inreg %rsrc, i6
; GFX90A-NEXT: s_mov_b32 s5, s7
; GFX90A-NEXT: s_mov_b32 s6, s8
; GFX90A-NEXT: s_mov_b32 s7, s9
-; GFX90A-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc
+; GFX90A-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm
; GFX90A-NEXT: s_endpgm
;
; GFX10PLUS-LABEL: atomic_cmpswap_i64_1d_no_return:
@@ -3246,7 +3246,7 @@ define amdgpu_ps void @atomic_cmpswap_i64_1d_no_return(<8 x i32> inreg %rsrc, i6
; GFX10PLUS-NEXT: s_mov_b32 s5, s7
; GFX10PLUS-NEXT: s_mov_b32 s6, s8
; GFX10PLUS-NEXT: s_mov_b32 s7, s9
-; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc
+; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm
; GFX10PLUS-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_cmpswap_i64_1d_no_return:
@@ -3259,7 +3259,7 @@ define amdgpu_ps void @atomic_cmpswap_i64_1d_no_return(<8 x i32> inreg %rsrc, i6
; GFX12-NEXT: s_mov_b32 s5, s7
; GFX12-NEXT: s_mov_b32 s6, s8
; GFX12-NEXT: s_mov_b32 s7, s9
-; GFX12-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
+; GFX12-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D
; GFX12-NEXT: s_endpgm
main_body:
%v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir
index 292fa4be1ca1d..a95b0ee434956 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir
@@ -25,6 +25,7 @@ body: |
; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si]].sub0
; GFX6-NEXT: $vgpr0 = COPY [[COPY3]]
; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ;
; GFX8-LABEL: name: atomic_cmpswap_i32_1d
; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2
; GFX8-NEXT: {{ $}}
@@ -35,6 +36,7 @@ body: |
; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi]].sub0
; GFX8-NEXT: $vgpr0 = COPY [[COPY3]]
; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ;
; GFX10-LABEL: name: atomic_cmpswap_i32_1d
; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2
; GFX10-NEXT: {{ $}}
@@ -45,6 +47,7 @@ body: |
; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_]].sub0
; GFX10-NEXT: $vgpr0 = COPY [[COPY3]]
; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ;
; GFX11-LABEL: name: atomic_cmpswap_i32_1d
; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2
; GFX11-NEXT: {{ $}}
@@ -55,6 +58,7 @@ body: |
; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11_]].sub0
; GFX11-NEXT: $vgpr0 = COPY [[COPY3]]
; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
+ ;
; GFX12-LABEL: name: atomic_cmpswap_i32_1d
; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2
; GFX12-NEXT: {{ $}}
@@ -89,39 +93,43 @@ body: |
; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
+ ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
; GFX6-NEXT: S_ENDPGM 0
+ ;
; GFX8-LABEL: name: atomic_cmpswap_i32_1d_no_return
; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
+ ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
; GFX8-NEXT: S_ENDPGM 0
+ ;
; GFX10-LABEL: name: atomic_cmpswap_i32_1d_no_return
; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
+ ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
; GFX10-NEXT: S_ENDPGM 0
+ ;
; GFX11-LABEL: name: atomic_cmpswap_i32_1d_no_return
; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
+ ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
; GFX11-NEXT: S_ENDPGM 0
+ ;
; GFX12-LABEL: name: atomic_cmpswap_i32_1d_no_return
; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
+ ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
; GFX12-NEXT: S_ENDPGM 0
%0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
%1:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
@@ -150,6 +158,7 @@ body: |
; GFX6-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si]].sub0_sub1
; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]]
; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1
+ ;
; GFX8-LABEL: name: atomic_cmpswap_i64_1d
; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
; GFX8-NEXT: {{ $}}
@@ -160,6 +169,7 @@ body: |
; GFX8-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi]].sub0_sub1
; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]]
; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1
+ ;
; GFX10-LABEL: name: atomic_cmpswap_i64_1d
; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
; GFX10-NEXT: {{ $}}
@@ -170,6 +180,7 @@ body: |
; GFX10-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_]].sub0_sub1
; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]]
; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1
+ ;
; GFX11-LABEL: name: atomic_cmpswap_i64_1d
; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
; GFX11-NEXT: {{ $}}
@@ -180,6 +191,7 @@ body: |
; GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11_]].sub0_sub1
; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]]
; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1
+ ;
; GFX12-LABEL: name: atomic_cmpswap_i64_1d
; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
; GFX12-NEXT: {{ $}}
@@ -214,39 +226,43 @@ body: |
; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
+ ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
; GFX6-NEXT: S_ENDPGM 0
+ ;
; GFX8-LABEL: name: atomic_cmpswap_i64_1d_no_return
; GFX8: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
+ ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
; GFX8-NEXT: S_ENDPGM 0
+ ;
; GFX10-LABEL: name: atomic_cmpswap_i64_1d_no_return
; GFX10: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
+ ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
; GFX10-NEXT: S_ENDPGM 0
+ ;
; GFX11-LABEL: name: atomic_cmpswap_i64_1d_no_return
; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
+ ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
; GFX11-NEXT: S_ENDPGM 0
+ ;
; GFX12-LABEL: name: atomic_cmpswap_i64_1d_no_return
; GFX12: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
+ ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
; GFX12-NEXT: S_ENDPGM 0
%0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
%1:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.pk.add.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.pk.add.ll
index 3d1d6c87eb98d..fd436b416d9c6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.pk.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.pk.add.ll
@@ -41,14 +41,14 @@ main_body:
define amdgpu_ps float @atomic_pk_add_f16_1d_v2_noret(<8 x i32> inreg %rsrc, <2 x half> %data, i32 %s) {
; GFX12-SDAG-LABEL: atomic_pk_add_f16_1d_v2_noret:
; GFX12-SDAG: ; %bb.0: ; %main_body
-; GFX12-SDAG-NEXT: image_atomic_pk_add_f16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
+; GFX12-SDAG-NEXT: image_atomic_pk_add_f16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: atomic_pk_add_f16_1d_v2_noret:
; GFX12-GISEL: ; %bb.0: ; %main_body
-; GFX12-GISEL-NEXT: image_atomic_pk_add_f16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
+; GFX12-GISEL-NEXT: image_atomic_pk_add_f16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
; GFX12-GISEL-NEXT: ; return to shader part epilog
@@ -79,14 +79,14 @@ main_body:
define amdgpu_ps float @atomic_pk_add_f16_1d_v4_noret(<8 x i32> inreg %rsrc, <4 x half> %data, i32 %s) {
; GFX12-SDAG-LABEL: atomic_pk_add_f16_1d_v4_noret:
; GFX12-SDAG: ; %bb.0: ; %main_body
-; GFX12-SDAG-NEXT: image_atomic_pk_add_f16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
+; GFX12-SDAG-NEXT: image_atomic_pk_add_f16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: atomic_pk_add_f16_1d_v4_noret:
; GFX12-GISEL: ; %bb.0: ; %main_body
-; GFX12-GISEL-NEXT: image_atomic_pk_add_f16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
+; GFX12-GISEL-NEXT: image_atomic_pk_add_f16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
; GFX12-GISEL-NEXT: ; return to shader part epilog
@@ -126,14 +126,14 @@ main_body:
define amdgpu_ps float @atomic_pk_add_bf16_1d_v2_noret(<8 x i32> inreg %rsrc, <2 x bfloat> %data, i32 %s) {
; GFX12-SDAG-LABEL: atomic_pk_add_bf16_1d_v2_noret:
; GFX12-SDAG: ; %bb.0: ; %main_body
-; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
+; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: atomic_pk_add_bf16_1d_v2_noret:
; GFX12-GISEL: ; %bb.0: ; %main_body
-; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
+; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
; GFX12-GISEL-NEXT: ; return to shader part epilog
@@ -173,14 +173,14 @@ main_body:
define amdgpu_ps float @atomic_pk_add_bf16_1d_v4_noret(<8 x i32> inreg %rsrc, <4 x bfloat> %data, i32 %s) {
; GFX12-SDAG-LABEL: atomic_pk_add_bf16_1d_v4_noret:
; GFX12-SDAG: ; %bb.0: ; %main_body
-; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
+; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: atomic_pk_add_bf16_1d_v4_noret:
; GFX12-GISEL: ; %bb.0: ; %main_body
-; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_RETURN
+; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
; GFX12-GISEL-NEXT: ; return to shader part epilog
@@ -192,14 +192,14 @@ main_body:
define amdgpu_ps float @atomic_pk_add_bf16_1d_v4_nt(<8 x i32> inreg %rsrc, <4 x bfloat> %data, i32 %s) {
; GFX12-SDAG-LABEL: atomic_pk_add_bf16_1d_v4_nt:
; GFX12-SDAG: ; %bb.0: ; %main_body
-; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT_RETURN
+; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: atomic_pk_add_bf16_1d_v4_nt:
; GFX12-GISEL: ; %bb.0: ; %main_body
-; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT_RETURN
+; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
; GFX12-GISEL-NEXT: ; return to shader part epilog
>From 250cba1406445511906b4e1062e4362a77888632 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Sun, 27 Jul 2025 17:08:08 +0800
Subject: [PATCH 2/7] [AMDGPU] Not drop reg and use nodbg empty
---
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 6b03923f5df61..9e9b9eecf93c8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2100,7 +2100,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
unsigned CPol = MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm();
// Keep GLC only when the atomic's result is actually used.
- if (BaseOpcode->Atomic && VDataOut && !MRI->use_empty(VDataOut))
+ if (BaseOpcode->Atomic && VDataOut && !MRI->use_nodbg_empty(VDataOut))
CPol |= AMDGPU::CPol::GLC;
if (CPol & ~((IsGFX12Plus ? AMDGPU::CPol::ALL : AMDGPU::CPol::ALL_pregfx12) |
@@ -2192,11 +2192,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
}
} else {
- // If VDataOut is unused, mark it as dead to avoid unnecessary VGPR usage.
- if (BaseOpcode->Atomic && MRI->use_empty(VDataOut))
- MIB.addDef(VDataOut, RegState::Dead);
- else
- MIB.addDef(VDataOut);
+ MIB.addDef(VDataOut); // vdata output
}
}
>From 169ccc9a252a411504fed421903411b3000a18b0 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Tue, 9 Sep 2025 20:59:32 +0800
Subject: [PATCH 3/7] [AMDGPU] Support image atomic no return instructions
---
llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h | 1 +
.../AMDGPU/AMDGPUInstructionSelector.cpp | 20 +-
llvm/lib/Target/AMDGPU/MIMGInstructions.td | 257 +++++++-
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 36 +-
.../llvm.amdgcn.image.atomic.dim.mir | 20 +-
.../AMDGPU/llvm.amdgcn.image.atomic.noret.ll | 581 ++++++++++++++++++
.../AMDGPU/llvm.amdgcn.image.atomic.pk.add.ll | 24 +-
7 files changed, 898 insertions(+), 41 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.noret.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 0eb00cbc2f466..21624fcda4929 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -50,6 +50,7 @@ const D16ImageDimIntrinsic *lookupD16ImageDimIntrinsic(unsigned Intr);
struct ImageDimIntrinsicInfo {
unsigned Intr;
unsigned BaseOpcode;
+ unsigned NoRetBaseOpcode;
MIMGDim Dim;
uint8_t NumOffsetArgs;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 9e9b9eecf93c8..af7414b816bc1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2006,19 +2006,27 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
MachineInstr &MI, const AMDGPU::ImageDimIntrinsicInfo *Intr) const {
MachineBasicBlock *MBB = MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
+ unsigned IntrOpcode = Intr->BaseOpcode;
+ Register ResultDef;
+ if (MI.getNumExplicitDefs() > 0)
+ ResultDef = MI.getOperand(0).getReg();
+
+ if (MRI->use_nodbg_empty(ResultDef) && Intr->NoRetBaseOpcode != 0 &&
+ Intr->NoRetBaseOpcode != Intr->BaseOpcode)
+ IntrOpcode = Intr->NoRetBaseOpcode;
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
- AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
+ AMDGPU::getMIMGBaseOpcodeInfo(IntrOpcode);
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
- unsigned IntrOpcode = Intr->BaseOpcode;
const bool IsGFX10Plus = AMDGPU::isGFX10Plus(STI);
const bool IsGFX11Plus = AMDGPU::isGFX11Plus(STI);
const bool IsGFX12Plus = AMDGPU::isGFX12Plus(STI);
const unsigned ArgOffset = MI.getNumExplicitDefs() + 1;
- Register VDataIn, VDataOut;
+ Register VDataIn = AMDGPU::NoRegister;
+ Register VDataOut = AMDGPU::NoRegister;
LLT VDataTy;
int NumVDataDwords = -1;
bool IsD16 = MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
@@ -2049,7 +2057,8 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
unsigned DMaskLanes = 0;
if (BaseOpcode->Atomic) {
- VDataOut = MI.getOperand(0).getReg();
+ if (!BaseOpcode->NoReturn)
+ VDataOut = MI.getOperand(0).getReg();
VDataIn = MI.getOperand(2).getReg();
LLT Ty = MRI->getType(VDataIn);
@@ -2100,9 +2109,8 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
unsigned CPol = MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm();
// Keep GLC only when the atomic's result is actually used.
- if (BaseOpcode->Atomic && VDataOut && !MRI->use_nodbg_empty(VDataOut))
+ if (BaseOpcode->Atomic && !BaseOpcode->NoReturn)
CPol |= AMDGPU::CPol::GLC;
-
if (CPol & ~((IsGFX12Plus ? AMDGPU::CPol::ALL : AMDGPU::CPol::ALL_pregfx12) |
AMDGPU::CPol::VOLATILE))
return false;
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 291c03ab223a8..3ed74d58eaae3 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -883,6 +883,17 @@ class MIMG_Atomic_gfx6789_base <bits<8> op, string asm, RegisterOperand data_rc,
let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$cpol$r128$tfe$lwe$da";
}
+class MIMG_Atomic_NoReturn_gfx6789_base <bits<8> op, string asm, RegisterOperand data_rc,
+ RegisterClass addr_rc, string dns="">
+ : MIMG_gfx6789 <op, (outs), dns> {
+ let isCodeGenOnly = 1;
+ let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
+ DMask:$dmask, UNorm:$unorm, CPol_0:$cpol,
+ R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da);
+
+ let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$cpol$r128$tfe$lwe$da";
+}
+
class MIMG_Atomic_gfx90a_base <bits<8> op, string asm, RegisterOperand data_rc,
RegisterClass addr_rc, string dns="">
: MIMG_gfx90a <op, (outs getAlign2RegOp<data_rc>.ret:$vdst), dns> {
@@ -895,6 +906,18 @@ class MIMG_Atomic_gfx90a_base <bits<8> op, string asm, RegisterOperand data_rc,
let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$cpol$r128$lwe$da";
}
+class MIMG_Atomic_NoReturn_gfx90a_base <bits<8> op, string asm, RegisterOperand data_rc,
+ RegisterClass addr_rc, string dns="">
+ : MIMG_gfx90a <op, (outs), dns> {
+ let isCodeGenOnly = 1;
+ let InOperandList = (ins getAlign2RegOp<data_rc>.ret:$vdata,
+ addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
+ DMask:$dmask, UNorm:$unorm, CPol_0:$cpol,
+ R128A16:$r128, LWE:$lwe, DA:$da);
+
+ let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$cpol$r128$lwe$da";
+}
+
class MIMG_Atomic_si<mimgopc op, string asm, RegisterOperand data_rc,
RegisterClass addr_rc, bit enableDasm = 0>
: MIMG_Atomic_gfx6789_base<op.SI, asm, data_rc, addr_rc,
@@ -902,6 +925,13 @@ class MIMG_Atomic_si<mimgopc op, string asm, RegisterOperand data_rc,
let AssemblerPredicate = isGFX6GFX7;
}
+class MIMG_Atomic_NoReturn_si<mimgopc op, string asm, RegisterOperand data_rc,
+ RegisterClass addr_rc, bit enableDasm = 0>
+ : MIMG_Atomic_NoReturn_gfx6789_base<op.SI, asm, data_rc, addr_rc,
+ !if(enableDasm, "GFX6GFX7", "")> {
+ let AssemblerPredicate = isGFX6GFX7;
+}
+
class MIMG_Atomic_vi<mimgopc op, string asm, RegisterOperand data_rc,
RegisterClass addr_rc, bit enableDasm = 0>
: MIMG_Atomic_gfx6789_base<op.VI, asm, data_rc, addr_rc, !if(enableDasm, "GFX8", "")> {
@@ -909,6 +939,13 @@ class MIMG_Atomic_vi<mimgopc op, string asm, RegisterOperand data_rc,
let MIMGEncoding = MIMGEncGfx8;
}
+class MIMG_Atomic_NoReturn_vi<mimgopc op, string asm, RegisterOperand data_rc,
+ RegisterClass addr_rc, bit enableDasm = 0>
+ : MIMG_Atomic_NoReturn_gfx6789_base<op.VI, asm, data_rc, addr_rc, !if(enableDasm, "GFX8", "")> {
+ let AssemblerPredicate = isGFX8GFX9NotGFX90A;
+ let MIMGEncoding = MIMGEncGfx8;
+}
+
class MIMG_Atomic_gfx90a<mimgopc op, string asm, RegisterOperand data_rc,
RegisterClass addr_rc, bit enableDasm = 0>
: MIMG_Atomic_gfx90a_base<op.VI, asm, data_rc, addr_rc, !if(enableDasm, "GFX90A", "")> {
@@ -916,6 +953,13 @@ class MIMG_Atomic_gfx90a<mimgopc op, string asm, RegisterOperand data_rc,
let MIMGEncoding = MIMGEncGfx90a;
}
+class MIMG_Atomic_NoReturn_gfx90a<mimgopc op, string asm, RegisterOperand data_rc,
+ RegisterClass addr_rc, bit enableDasm = 0>
+ : MIMG_Atomic_NoReturn_gfx90a_base<op.VI, asm, data_rc, addr_rc, !if(enableDasm, "GFX90A", "")> {
+ let AssemblerPredicate = isGFX90APlus;
+ let MIMGEncoding = MIMGEncGfx90a;
+}
+
class MIMG_Atomic_gfx10<mimgopc op, string opcode,
RegisterOperand DataRC, RegisterClass AddrRC,
bit enableDisasm = 0>
@@ -929,6 +973,17 @@ class MIMG_Atomic_gfx10<mimgopc op, string opcode,
let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
}
+class MIMG_Atomic_NoReturn_gfx10<mimgopc op, string opcode,
+ RegisterOperand DataRC, RegisterClass AddrRC,
+ bit enableDisasm = 0>
+ : MIMG_gfx10<op.GFX10M, (outs), !if(enableDisasm, "GFX10", "")> {
+ let isCodeGenOnly = 1;
+ let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256_XNULL:$srsrc,
+ DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol_0:$cpol,
+ R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe);
+ let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
+}
+
class MIMG_Atomic_nsa_gfx10<mimgopc op, string opcode,
RegisterOperand DataRC, int num_addrs,
bit enableDisasm = 0>
@@ -944,6 +999,19 @@ class MIMG_Atomic_nsa_gfx10<mimgopc op, string opcode,
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
}
+class MIMG_Atomic_NoReturn_nsa_gfx10<mimgopc op, string opcode,
+ RegisterOperand DataRC, int num_addrs,
+ bit enableDisasm = 0>
+ : MIMG_nsa_gfx10<op.GFX10M, (outs), num_addrs, !if(enableDisasm, "GFX10", "")> {
+ let isCodeGenOnly = 1;
+ let InOperandList = !con((ins DataRC:$vdata),
+ AddrIns,
+ (ins SReg_256_XNULL:$srsrc, DMask:$dmask,
+ Dim:$dim, UNorm:$unorm, CPol_0:$cpol,
+ R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe));
+ let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
+}
+
class MIMG_Atomic_gfx11<mimgopc op, string opcode,
RegisterOperand DataRC, RegisterClass AddrRC,
bit enableDisasm = 0>
@@ -957,6 +1025,17 @@ class MIMG_Atomic_gfx11<mimgopc op, string opcode,
let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
}
+class MIMG_Atomic_NoReturn_gfx11<mimgopc op, string opcode,
+ RegisterOperand DataRC, RegisterClass AddrRC,
+ bit enableDisasm = 0>
+ : MIMG_gfx11<op.GFX11, (outs), !if(enableDisasm, "GFX11", "")> {
+ let isCodeGenOnly = 1;
+ let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256_XNULL:$srsrc,
+ DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol_0:$cpol,
+ R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe);
+ let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
+}
+
class MIMG_Atomic_nsa_gfx11<mimgopc op, string opcode,
RegisterOperand DataRC, int num_addrs,
bit enableDisasm = 0>
@@ -972,6 +1051,19 @@ class MIMG_Atomic_nsa_gfx11<mimgopc op, string opcode,
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
}
+class MIMG_Atomic_NoReturn_nsa_gfx11<mimgopc op, string opcode,
+ RegisterOperand DataRC, int num_addrs,
+ bit enableDisasm = 0>
+ : MIMG_nsa_gfx11<op.GFX11, (outs), num_addrs, !if(enableDisasm, "GFX11", "")> {
+ let isCodeGenOnly = 1;
+ let InOperandList = !con((ins DataRC:$vdata),
+ AddrIns,
+ (ins SReg_256_XNULL:$srsrc, DMask:$dmask,
+ Dim:$dim, UNorm:$unorm, CPol_0:$cpol,
+ R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe));
+ let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
+}
+
class VIMAGE_Atomic_gfx12<mimgopc op, string opcode, RegisterOperand DataRC,
int num_addrs, string renamed, bit enableDisasm = 0>
: VIMAGE_gfx12<op.GFX12, (outs DataRC:$vdst), num_addrs,
@@ -986,6 +1078,18 @@ class VIMAGE_Atomic_gfx12<mimgopc op, string opcode, RegisterOperand DataRC,
", $rsrc$dmask$dim$cpol$r128$a16$tfe";
}
+class VIMAGE_Atomic_NoReturn_gfx12<mimgopc op, string opcode, RegisterOperand DataRC,
+ int num_addrs, string renamed, bit enableDisasm = 0>
+ : VIMAGE_gfx12<op.GFX12, (outs), num_addrs, !if(enableDisasm, "GFX12", "")> {
+ let isCodeGenOnly = 1;
+ let InOperandList = !con((ins DataRC:$vdata),
+ AddrIns,
+ (ins SReg_256_XNULL:$rsrc, DMask:$dmask, Dim:$dim,
+ CPol_0:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe));
+ let AsmString = !if(!empty(renamed), opcode, renamed)#" $vdata, "#AddrAsm#
+ ", $rsrc$dmask$dim$cpol$r128$a16$tfe";
+}
+
multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
RegisterOperand data_rc,
bit enableDasm = 0,
@@ -1091,6 +1195,136 @@ multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
}
}
+multiclass MIMG_Atomic_NoReturn_Addr_Helper_m <mimgopc op, string asm,
+ RegisterOperand data_rc,
+ bit enableDasm = 0,
+ bit isFP = 0,
+ string renamed = ""> {
+ let hasSideEffects = 1,
+ mayLoad = 1, mayStore = 1, hasPostISelHook = 0, DisableWQM = 1,
+ FPAtomic = isFP in {
+ let VAddrDwords = 1 in {
+ let ssamp = 0 in {
+ if op.HAS_SI then {
+ def _V1_si : MIMG_Atomic_NoReturn_si <op, asm, data_rc, VGPR_32, enableDasm>;
+ }
+ if op.HAS_VI then {
+ def _V1_vi : MIMG_Atomic_NoReturn_vi <op, asm, data_rc, VGPR_32, enableDasm>;
+ let hasPostISelHook = 1 in
+ def _V1_gfx90a : MIMG_Atomic_NoReturn_gfx90a <op, asm, data_rc, VGPR_32, enableDasm>;
+ }
+ if op.HAS_GFX10M then {
+ def _V1_gfx10 : MIMG_Atomic_NoReturn_gfx10 <op, asm, data_rc, VGPR_32, enableDasm>;
+ }
+ if op.HAS_GFX11 then {
+ def _V1_gfx11 : MIMG_Atomic_NoReturn_gfx11 <op, asm, data_rc, VGPR_32, enableDasm>;
+ }
+ }
+ if op.HAS_GFX12 then {
+ def _V1_gfx12 : VIMAGE_Atomic_NoReturn_gfx12 <op, asm, data_rc, 1, renamed>;
+ }
+ }
+ let VAddrDwords = 2 in {
+ let ssamp = 0 in {
+ if op.HAS_SI then {
+ def _V2_si : MIMG_Atomic_NoReturn_si <op, asm, data_rc, VReg_64, 0>;
+ }
+ if op.HAS_VI then {
+ def _V2_vi : MIMG_Atomic_NoReturn_vi <op, asm, data_rc, VReg_64, 0>;
+ def _V2_gfx90a : MIMG_Atomic_NoReturn_gfx90a <op, asm, data_rc, VReg_64, 0>;
+ }
+ if op.HAS_GFX10M then {
+ def _V2_gfx10 : MIMG_Atomic_NoReturn_gfx10 <op, asm, data_rc, VReg_64, 0>;
+ def _V2_nsa_gfx10 : MIMG_Atomic_NoReturn_nsa_gfx10 <op, asm, data_rc, 2, 0>;
+ }
+ if op.HAS_GFX11 then {
+ def _V2_gfx11 : MIMG_Atomic_NoReturn_gfx11 <op, asm, data_rc, VReg_64, 0>;
+ def _V2_nsa_gfx11 : MIMG_Atomic_NoReturn_nsa_gfx11 <op, asm, data_rc, 2, 0>;
+ }
+ }
+ if op.HAS_GFX12 then {
+ def _V2_gfx12 : VIMAGE_Atomic_NoReturn_gfx12 <op, asm, data_rc, 2, renamed>;
+ }
+ }
+ let VAddrDwords = 3 in {
+ let ssamp = 0 in {
+ if op.HAS_SI then {
+ def _V3_si : MIMG_Atomic_NoReturn_si <op, asm, data_rc, VReg_96, 0>;
+ }
+ if op.HAS_VI then {
+ def _V3_vi : MIMG_Atomic_NoReturn_vi <op, asm, data_rc, VReg_96, 0>;
+ def _V3_gfx90a : MIMG_Atomic_NoReturn_gfx90a <op, asm, data_rc, VReg_96, 0>;
+ }
+ if op.HAS_GFX10M then {
+ def _V3_gfx10 : MIMG_Atomic_NoReturn_gfx10 <op, asm, data_rc, VReg_96, 0>;
+ def _V3_nsa_gfx10 : MIMG_Atomic_NoReturn_nsa_gfx10 <op, asm, data_rc, 3, 0>;
+ }
+ if op.HAS_GFX11 then {
+ def _V3_gfx11 : MIMG_Atomic_NoReturn_gfx11 <op, asm, data_rc, VReg_96, 0>;
+ def _V3_nsa_gfx11 : MIMG_Atomic_NoReturn_nsa_gfx11 <op, asm, data_rc, 3, 0>;
+ }
+ }
+ if op.HAS_GFX12 then {
+ def _V3_gfx12 : VIMAGE_Atomic_NoReturn_gfx12 <op, asm, data_rc, 3, renamed>;
+ }
+ }
+ let VAddrDwords = 4 in {
+ let ssamp = 0 in {
+ if op.HAS_SI then {
+ def _V4_si : MIMG_Atomic_NoReturn_si <op, asm, data_rc, VReg_128, 0>;
+ }
+ if op.HAS_VI then {
+ def _V4_vi : MIMG_Atomic_NoReturn_vi <op, asm, data_rc, VReg_128, 0>;
+ def _V4_gfx90a : MIMG_Atomic_NoReturn_gfx90a <op, asm, data_rc, VReg_128, 0>;
+ }
+ if op.HAS_GFX10M then {
+ def _V4_gfx10 : MIMG_Atomic_NoReturn_gfx10 <op, asm, data_rc, VReg_128, 0>;
+ def _V4_nsa_gfx10 : MIMG_Atomic_NoReturn_nsa_gfx10 <op, asm, data_rc, 4, enableDasm>;
+ }
+ if op.HAS_GFX11 then {
+ def _V4_gfx11 : MIMG_Atomic_NoReturn_gfx11 <op, asm, data_rc, VReg_128, 0>;
+ def _V4_nsa_gfx11 : MIMG_Atomic_NoReturn_nsa_gfx11 <op, asm, data_rc, 4, enableDasm>;
+ }
+ }
+ if op.HAS_GFX12 then {
+ def _V4_gfx12 : VIMAGE_Atomic_NoReturn_gfx12 <op, asm, data_rc, 4, renamed, enableDasm>;
+ }
+ }
+ }
+}
+
+multiclass MIMG_Atomic_NoReturn <mimgopc op, string asm, bit isCmpSwap = 0, bit isFP = 0,
+ string renamed = ""> {
+ def "" : MIMGBaseOpcode {
+ let Atomic = 1;
+ let AtomicX2 = isCmpSwap;
+ let IsAtomicRet = 0;
+ let NoReturn = 1;
+ }
+
+ let BaseOpcode = !cast<MIMGBaseOpcode>(NAME) in {
+ // _V* variants have different vdata size, but the size is encoded implicitly,
+ // using dmask and tfe. Only 32-bit variant is registered with disassembler.
+ // Other variants are reconstructed by disassembler using dmask and tfe.
+ if !not(isCmpSwap) then {
+ let VDataDwords = 1 in
+ defm _V1 : MIMG_Atomic_NoReturn_Addr_Helper_m <op, asm, AVLdSt_32, 1, isFP, renamed>;
+ }
+
+ let VDataDwords = 2 in
+ defm _V2 : MIMG_Atomic_NoReturn_Addr_Helper_m <op, asm, AVLdSt_64, isCmpSwap, isFP, renamed>;
+ let VDataDwords = 3 in
+ defm _V3 : MIMG_Atomic_NoReturn_Addr_Helper_m <op, asm, AVLdSt_96, 0, isFP, renamed>;
+
+ if isCmpSwap then {
+ let VDataDwords = 4 in
+ defm _V4 : MIMG_Atomic_NoReturn_Addr_Helper_m <op, asm, AVLdSt_128, 0, isFP, renamed>;
+ let VDataDwords = 5 in
+ defm _V5 : MIMG_Atomic_NoReturn_Addr_Helper_m <op, asm, AVLdSt_128, 0, isFP, renamed>;
+ }
+ }
+}
+
multiclass MIMG_Atomic <mimgopc op, string asm, bit isCmpSwap = 0, bit isFP = 0,
string renamed = ""> { // 64-bit atomics
let IsAtomicRet = 1 in {
@@ -1121,6 +1355,8 @@ multiclass MIMG_Atomic <mimgopc op, string asm, bit isCmpSwap = 0, bit isFP = 0,
}
}
} // End IsAtomicRet = 1
+
+ defm "_NORTN" : MIMG_Atomic_NoReturn <op, asm, isCmpSwap, isFP, renamed>;
}
multiclass MIMG_Atomic_Renamed <mimgopc op, string asm, string renamed,
@@ -1815,6 +2051,7 @@ let SubtargetPredicate = isGFX12Plus in {
class ImageDimIntrinsicInfo<AMDGPUImageDimIntrinsic I> {
Intrinsic Intr = I;
MIMGBaseOpcode BaseOpcode = !cast<MIMGBaseOpcode>(!strconcat("IMAGE_", I.P.OpMod));
+ MIMGBaseOpcode NoRetBaseOpcode = BaseOpcode;
AMDGPUDimProps Dim = I.P.Dim;
AMDGPUImageDimIntrinsicEval DimEval = AMDGPUImageDimIntrinsicEval<I.P>;
@@ -1850,13 +2087,20 @@ class ImageDimIntrinsicInfo<AMDGPUImageDimIntrinsic I> {
bits<8> CoordTyArg = !add(GradientTyArg, !if(I.P.Gradients, 1, 0));
}
+class ImageDimAtomicIntrinsicInfo<AMDGPUImageDimIntrinsic I>
+ : ImageDimIntrinsicInfo<I> {
+ MIMGBaseOpcode NoRetBaseOpcode =
+ !cast<MIMGBaseOpcode>(!strconcat("IMAGE_", I.P.OpMod, "_NORTN"));
+}
+
def ImageDimIntrinsicTable : GenericTable {
let FilterClass = "ImageDimIntrinsicInfo";
- let Fields = ["Intr", "BaseOpcode", "Dim", "NumOffsetArgs", "NumBiasArgs", "NumZCompareArgs", "NumGradients", "NumDmask", "NumData", "NumVAddrs", "NumArgs",
- "DMaskIndex", "VAddrStart", "OffsetIndex", "BiasIndex", "ZCompareIndex", "GradientStart", "CoordStart", "LodIndex", "MipIndex", "VAddrEnd",
- "RsrcIndex", "SampIndex", "UnormIndex", "TexFailCtrlIndex", "CachePolicyIndex",
+ let Fields = ["Intr", "BaseOpcode", "NoRetBaseOpcode", "Dim", "NumOffsetArgs", "NumBiasArgs", "NumZCompareArgs", "NumGradients", "NumDmask", "NumData",
+ "NumVAddrs", "NumArgs", "DMaskIndex", "VAddrStart", "OffsetIndex", "BiasIndex", "ZCompareIndex", "GradientStart", "CoordStart", "LodIndex", "MipIndex",
+ "VAddrEnd", "RsrcIndex", "SampIndex", "UnormIndex", "TexFailCtrlIndex", "CachePolicyIndex",
"BiasTyArg", "GradientTyArg", "CoordTyArg"];
string TypeOf_BaseOpcode = "MIMGBaseOpcode";
+ string TypeOf_NoRetBaseOpcode = "MIMGBaseOpcode";
string TypeOf_Dim = "MIMGDim";
let PrimaryKey = ["Intr"];
@@ -1869,11 +2113,14 @@ def getImageDimIntrinsicByBaseOpcode : SearchIndex {
let Key = ["BaseOpcode", "Dim"];
}
-foreach intr = !listconcat(AMDGPUImageDimIntrinsics,
- AMDGPUImageDimAtomicIntrinsics) in {
+foreach intr = AMDGPUImageDimIntrinsics in {
def : ImageDimIntrinsicInfo<intr>;
}
+foreach intr = AMDGPUImageDimAtomicIntrinsics in {
+ def : ImageDimAtomicIntrinsicInfo<intr>;
+}
+
// L to LZ Optimization Mapping
def : MIMGLZMapping<IMAGE_SAMPLE_L, IMAGE_SAMPLE_LZ>;
def : MIMGLZMapping<IMAGE_SAMPLE_C_L, IMAGE_SAMPLE_C_LZ>;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 431e6284f3d66..3a37206948f63 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9149,16 +9149,24 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
SDLoc DL(Op);
MachineFunction &MF = DAG.getMachineFunction();
const GCNSubtarget *ST = &MF.getSubtarget<GCNSubtarget>();
+ unsigned IntrOpcode = Intr->BaseOpcode;
+ if (!Op.getNode()->hasAnyUseOfValue(0)) {
+ if (Intr->NoRetBaseOpcode != 0 && Intr->NoRetBaseOpcode != Intr->BaseOpcode)
+ IntrOpcode = Intr->NoRetBaseOpcode;
+ }
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
- AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
+ AMDGPU::getMIMGBaseOpcodeInfo(IntrOpcode);
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
- unsigned IntrOpcode = Intr->BaseOpcode;
bool IsGFX10Plus = AMDGPU::isGFX10Plus(*Subtarget);
bool IsGFX11Plus = AMDGPU::isGFX11Plus(*Subtarget);
bool IsGFX12Plus = AMDGPU::isGFX12Plus(*Subtarget);
SmallVector<EVT, 3> ResultTypes(Op->values());
SmallVector<EVT, 3> OrigResultTypes(Op->values());
+ if (BaseOpcode->NoReturn && BaseOpcode->Atomic) {
+ ResultTypes.clear();
+ ResultTypes.push_back(MVT::Other);
+ }
bool IsD16 = false;
bool IsG16 = false;
bool IsA16 = false;
@@ -9178,7 +9186,9 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
IsAtomicPacked16Bit =
(Intr->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16 ||
- Intr->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16);
+ Intr->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16_NORTN ||
+ Intr->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16 ||
+ Intr->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16_NORTN);
bool Is64Bit = VData.getValueSizeInBits() == 64;
if (BaseOpcode->AtomicX2) {
@@ -9188,7 +9198,9 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
if (Is64Bit)
VData = DAG.getBitcast(MVT::v4i32, VData);
- ResultTypes[0] = Is64Bit ? MVT::v2i64 : MVT::v2i32;
+ if (!BaseOpcode->NoReturn)
+ ResultTypes[0] = Is64Bit ? MVT::v2i64 : MVT::v2i32;
+
DMask = Is64Bit ? 0xf : 0x3;
NumVDataDwords = Is64Bit ? 4 : 2;
} else {
@@ -9415,9 +9427,8 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
unsigned CPol = Op.getConstantOperandVal(ArgOffset + Intr->CachePolicyIndex);
// Keep GLC only when the atomic's result is actually used.
- if (BaseOpcode->Atomic && !Op.getValue(0).use_empty())
+ if (BaseOpcode->Atomic && !BaseOpcode->NoReturn)
CPol |= AMDGPU::CPol::GLC;
-
if (CPol & ~((IsGFX12Plus ? AMDGPU::CPol::ALL : AMDGPU::CPol::ALL_pregfx12) |
AMDGPU::CPol::VOLATILE))
return Op;
@@ -9529,13 +9540,22 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
DAG.setNodeMemRefs(NewNode, {MemRef});
}
+ if (BaseOpcode->NoReturn) {
+ if (BaseOpcode->Atomic) {
+ SmallVector<SDValue, 2> RetVals;
+ RetVals.push_back(DAG.getPOISON(OrigResultTypes[0]));
+ RetVals.push_back(SDValue(NewNode, 0));
+ return DAG.getMergeValues(RetVals, DL);
+ }
+ return SDValue(NewNode, 0);
+ }
+
if (BaseOpcode->AtomicX2) {
SmallVector<SDValue, 1> Elt;
DAG.ExtractVectorElements(SDValue(NewNode, 0), Elt, 0, 1);
return DAG.getMergeValues({Elt[0], SDValue(NewNode, 1)}, DL);
}
- if (BaseOpcode->NoReturn)
- return SDValue(NewNode, 0);
+
return constructRetValue(DAG, NewNode, OrigResultTypes, IsTexFail,
Subtarget->hasUnpackedD16VMem(), IsD16, DMaskLanes,
NumVDataDwords, IsAtomicPacked16Bit, DL);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir
index a95b0ee434956..4f160b6cb4b1b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir
@@ -93,7 +93,7 @@ body: |
; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_si:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
+ ; GFX6-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V2_V1_si [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
; GFX6-NEXT: S_ENDPGM 0
;
; GFX8-LABEL: name: atomic_cmpswap_i32_1d_no_return
@@ -102,7 +102,7 @@ body: |
; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_vi:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
+ ; GFX8-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V2_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 3, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
; GFX8-NEXT: S_ENDPGM 0
;
; GFX10-LABEL: name: atomic_cmpswap_i32_1d_no_return
@@ -111,7 +111,7 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
+ ; GFX10-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
; GFX10-NEXT: S_ENDPGM 0
;
; GFX11-LABEL: name: atomic_cmpswap_i32_1d_no_return
@@ -120,7 +120,7 @@ body: |
; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
+ ; GFX11-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V2_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
; GFX11-NEXT: S_ENDPGM 0
;
; GFX12-LABEL: name: atomic_cmpswap_i32_1d_no_return
@@ -129,7 +129,7 @@ body: |
; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
+ ; GFX12-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V2_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 8)
; GFX12-NEXT: S_ENDPGM 0
%0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
%1:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
@@ -226,7 +226,7 @@ body: |
; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX6-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX6-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_si:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
+ ; GFX6-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V4_V1_si [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
; GFX6-NEXT: S_ENDPGM 0
;
; GFX8-LABEL: name: atomic_cmpswap_i64_1d_no_return
@@ -235,7 +235,7 @@ body: |
; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX8-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_vi:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
+ ; GFX8-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V4_V1_vi [[COPY1]], [[COPY2]], [[COPY]], 15, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
; GFX8-NEXT: S_ENDPGM 0
;
; GFX10-LABEL: name: atomic_cmpswap_i64_1d_no_return
@@ -244,7 +244,7 @@ body: |
; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
+ ; GFX10-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V4_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
; GFX10-NEXT: S_ENDPGM 0
;
; GFX11-LABEL: name: atomic_cmpswap_i64_1d_no_return
@@ -253,7 +253,7 @@ body: |
; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
+ ; GFX11-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V4_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
; GFX11-NEXT: S_ENDPGM 0
;
; GFX12-LABEL: name: atomic_cmpswap_i64_1d_no_return
@@ -262,7 +262,7 @@ body: |
; GFX12-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
; GFX12-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4
- ; GFX12-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V4_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
+ ; GFX12-NEXT: IMAGE_ATOMIC_CMPSWAP_NORTN_V4_V1_gfx12 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 8)
; GFX12-NEXT: S_ENDPGM 0
%0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
%1:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.noret.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.noret.ll
new file mode 100644
index 0000000000000..343fa071a54be
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.noret.ll
@@ -0,0 +1,581 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX10PLUS-GISE %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX10PLUS %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-GISE %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s
+
+define amdgpu_ps void @atomic_swap_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+; GFX10PLUS-GISE-LABEL: atomic_swap_1d:
+; GFX10PLUS-GISE: ; %bb.0:
+; GFX10PLUS-GISE-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-GISE-NEXT: s_endpgm
+;
+; GFX10PLUS-LABEL: atomic_swap_1d:
+; GFX10PLUS: ; %bb.0:
+; GFX10PLUS-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-GISE-LABEL: atomic_swap_1d:
+; GFX12-GISE: ; %bb.0:
+; GFX12-GISE-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX12-GISE-NEXT: s_endpgm
+;
+; GFX12-LABEL: atomic_swap_1d:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX12-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @atomic_swap_1d_i64(<8 x i32> inreg %rsrc, i64 %data, i32 %s) {
+; GFX10PLUS-GISE-LABEL: atomic_swap_1d_i64:
+; GFX10PLUS-GISE: ; %bb.0:
+; GFX10PLUS-GISE-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-GISE-NEXT: s_endpgm
+;
+; GFX10PLUS-LABEL: atomic_swap_1d_i64:
+; GFX10PLUS: ; %bb.0:
+; GFX10PLUS-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-GISE-LABEL: atomic_swap_1d_i64:
+; GFX12-GISE: ; %bb.0:
+; GFX12-GISE-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D
+; GFX12-GISE-NEXT: s_endpgm
+;
+; GFX12-LABEL: atomic_swap_1d_i64:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D
+; GFX12-NEXT: s_endpgm
+ %v = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @atomic_swap_1d_float(<8 x i32> inreg %rsrc, float %data, i32 %s) {
+; GFX10PLUS-GISE-LABEL: atomic_swap_1d_float:
+; GFX10PLUS-GISE: ; %bb.0:
+; GFX10PLUS-GISE-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-GISE-NEXT: s_endpgm
+;
+; GFX10PLUS-LABEL: atomic_swap_1d_float:
+; GFX10PLUS: ; %bb.0:
+; GFX10PLUS-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-GISE-LABEL: atomic_swap_1d_float:
+; GFX12-GISE: ; %bb.0:
+; GFX12-GISE-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX12-GISE-NEXT: s_endpgm
+;
+; GFX12-LABEL: atomic_swap_1d_float:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX12-NEXT: s_endpgm
+ %v = call float @llvm.amdgcn.image.atomic.swap.1d.f32.i32(float %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @atomic_add_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+; GFX10PLUS-GISE-LABEL: atomic_add_1d:
+; GFX10PLUS-GISE: ; %bb.0:
+; GFX10PLUS-GISE-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-GISE-NEXT: s_endpgm
+;
+; GFX10PLUS-LABEL: atomic_add_1d:
+; GFX10PLUS: ; %bb.0:
+; GFX10PLUS-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-GISE-LABEL: atomic_add_1d:
+; GFX12-GISE: ; %bb.0:
+; GFX12-GISE-NEXT: image_atomic_add_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX12-GISE-NEXT: s_endpgm
+;
+; GFX12-LABEL: atomic_add_1d:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: image_atomic_add_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX12-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @atomic_sub_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+; GFX10PLUS-GISE-LABEL: atomic_sub_1d:
+; GFX10PLUS-GISE: ; %bb.0:
+; GFX10PLUS-GISE-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-GISE-NEXT: s_endpgm
+;
+; GFX10PLUS-LABEL: atomic_sub_1d:
+; GFX10PLUS: ; %bb.0:
+; GFX10PLUS-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-GISE-LABEL: atomic_sub_1d:
+; GFX12-GISE: ; %bb.0:
+; GFX12-GISE-NEXT: image_atomic_sub_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX12-GISE-NEXT: s_endpgm
+;
+; GFX12-LABEL: atomic_sub_1d:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: image_atomic_sub_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX12-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @atomic_smin_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+; GFX10PLUS-GISE-LABEL: atomic_smin_1d:
+; GFX10PLUS-GISE: ; %bb.0:
+; GFX10PLUS-GISE-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-GISE-NEXT: s_endpgm
+;
+; GFX10PLUS-LABEL: atomic_smin_1d:
+; GFX10PLUS: ; %bb.0:
+; GFX10PLUS-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-GISE-LABEL: atomic_smin_1d:
+; GFX12-GISE: ; %bb.0:
+; GFX12-GISE-NEXT: image_atomic_min_int v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX12-GISE-NEXT: s_endpgm
+;
+; GFX12-LABEL: atomic_smin_1d:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: image_atomic_min_int v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX12-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @atomic_umin_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+; GFX10PLUS-GISE-LABEL: atomic_umin_1d:
+; GFX10PLUS-GISE: ; %bb.0:
+; GFX10PLUS-GISE-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-GISE-NEXT: s_endpgm
+;
+; GFX10PLUS-LABEL: atomic_umin_1d:
+; GFX10PLUS: ; %bb.0:
+; GFX10PLUS-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-GISE-LABEL: atomic_umin_1d:
+; GFX12-GISE: ; %bb.0:
+; GFX12-GISE-NEXT: image_atomic_min_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX12-GISE-NEXT: s_endpgm
+;
+; GFX12-LABEL: atomic_umin_1d:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: image_atomic_min_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX12-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @atomic_smax_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+; GFX10PLUS-GISE-LABEL: atomic_smax_1d:
+; GFX10PLUS-GISE: ; %bb.0:
+; GFX10PLUS-GISE-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-GISE-NEXT: s_endpgm
+;
+; GFX10PLUS-LABEL: atomic_smax_1d:
+; GFX10PLUS: ; %bb.0:
+; GFX10PLUS-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-GISE-LABEL: atomic_smax_1d:
+; GFX12-GISE: ; %bb.0:
+; GFX12-GISE-NEXT: image_atomic_max_int v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX12-GISE-NEXT: s_endpgm
+;
+; GFX12-LABEL: atomic_smax_1d:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: image_atomic_max_int v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX12-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @atomic_umax_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+; GFX10PLUS-GISE-LABEL: atomic_umax_1d:
+; GFX10PLUS-GISE: ; %bb.0:
+; GFX10PLUS-GISE-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-GISE-NEXT: s_endpgm
+;
+; GFX10PLUS-LABEL: atomic_umax_1d:
+; GFX10PLUS: ; %bb.0:
+; GFX10PLUS-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-GISE-LABEL: atomic_umax_1d:
+; GFX12-GISE: ; %bb.0:
+; GFX12-GISE-NEXT: image_atomic_max_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX12-GISE-NEXT: s_endpgm
+;
+; GFX12-LABEL: atomic_umax_1d:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: image_atomic_max_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX12-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @atomic_and_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+; GFX10PLUS-GISE-LABEL: atomic_and_1d:
+; GFX10PLUS-GISE: ; %bb.0:
+; GFX10PLUS-GISE-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-GISE-NEXT: s_endpgm
+;
+; GFX10PLUS-LABEL: atomic_and_1d:
+; GFX10PLUS: ; %bb.0:
+; GFX10PLUS-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-GISE-LABEL: atomic_and_1d:
+; GFX12-GISE: ; %bb.0:
+; GFX12-GISE-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX12-GISE-NEXT: s_endpgm
+;
+; GFX12-LABEL: atomic_and_1d:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX12-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @atomic_or_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+; GFX10PLUS-GISE-LABEL: atomic_or_1d:
+; GFX10PLUS-GISE: ; %bb.0:
+; GFX10PLUS-GISE-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-GISE-NEXT: s_endpgm
+;
+; GFX10PLUS-LABEL: atomic_or_1d:
+; GFX10PLUS: ; %bb.0:
+; GFX10PLUS-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-GISE-LABEL: atomic_or_1d:
+; GFX12-GISE: ; %bb.0:
+; GFX12-GISE-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX12-GISE-NEXT: s_endpgm
+;
+; GFX12-LABEL: atomic_or_1d:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX12-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @atomic_xor_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+; GFX10PLUS-GISE-LABEL: atomic_xor_1d:
+; GFX10PLUS-GISE: ; %bb.0:
+; GFX10PLUS-GISE-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-GISE-NEXT: s_endpgm
+;
+; GFX10PLUS-LABEL: atomic_xor_1d:
+; GFX10PLUS: ; %bb.0:
+; GFX10PLUS-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-GISE-LABEL: atomic_xor_1d:
+; GFX12-GISE: ; %bb.0:
+; GFX12-GISE-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX12-GISE-NEXT: s_endpgm
+;
+; GFX12-LABEL: atomic_xor_1d:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX12-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @atomic_inc_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+; GFX10PLUS-GISE-LABEL: atomic_inc_1d:
+; GFX10PLUS-GISE: ; %bb.0:
+; GFX10PLUS-GISE-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-GISE-NEXT: s_endpgm
+;
+; GFX10PLUS-LABEL: atomic_inc_1d:
+; GFX10PLUS: ; %bb.0:
+; GFX10PLUS-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-GISE-LABEL: atomic_inc_1d:
+; GFX12-GISE: ; %bb.0:
+; GFX12-GISE-NEXT: image_atomic_inc_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX12-GISE-NEXT: s_endpgm
+;
+; GFX12-LABEL: atomic_inc_1d:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: image_atomic_inc_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX12-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @atomic_dec_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+; GFX10PLUS-GISE-LABEL: atomic_dec_1d:
+; GFX10PLUS-GISE: ; %bb.0:
+; GFX10PLUS-GISE-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-GISE-NEXT: s_endpgm
+;
+; GFX10PLUS-LABEL: atomic_dec_1d:
+; GFX10PLUS: ; %bb.0:
+; GFX10PLUS-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-GISE-LABEL: atomic_dec_1d:
+; GFX12-GISE: ; %bb.0:
+; GFX12-GISE-NEXT: image_atomic_dec_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX12-GISE-NEXT: s_endpgm
+;
+; GFX12-LABEL: atomic_dec_1d:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: image_atomic_dec_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
+; GFX12-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @atomic_cmpswap_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i32 %s) {
+; GFX10PLUS-GISE-LABEL: atomic_cmpswap_1d:
+; GFX10PLUS-GISE: ; %bb.0:
+; GFX10PLUS-GISE-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-GISE-NEXT: s_endpgm
+;
+; GFX10PLUS-LABEL: atomic_cmpswap_1d:
+; GFX10PLUS: ; %bb.0:
+; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-GISE-LABEL: atomic_cmpswap_1d:
+; GFX12-GISE: ; %bb.0:
+; GFX12-GISE-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D
+; GFX12-GISE-NEXT: s_endpgm
+;
+; GFX12-LABEL: atomic_cmpswap_1d:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D
+; GFX12-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @atomic_cmpswap_1d_64(<8 x i32> inreg %rsrc, i64 %cmp, i64 %swap, i32 %s) {
+; GFX10PLUS-GISE-LABEL: atomic_cmpswap_1d_64:
+; GFX10PLUS-GISE: ; %bb.0:
+; GFX10PLUS-GISE-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-GISE-NEXT: s_endpgm
+;
+; GFX10PLUS-LABEL: atomic_cmpswap_1d_64:
+; GFX10PLUS: ; %bb.0:
+; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-GISE-LABEL: atomic_cmpswap_1d_64:
+; GFX12-GISE: ; %bb.0:
+; GFX12-GISE-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-GISE-NEXT: s_endpgm
+;
+; GFX12-LABEL: atomic_cmpswap_1d_64:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D
+; GFX12-NEXT: s_endpgm
+ %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @atomic_add_2d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t) {
+; GFX10PLUS-GISE-LABEL: atomic_add_2d:
+; GFX10PLUS-GISE: ; %bb.0:
+; GFX10PLUS-GISE-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm
+; GFX10PLUS-GISE-NEXT: s_endpgm
+;
+; GFX10PLUS-LABEL: atomic_add_2d:
+; GFX10PLUS: ; %bb.0:
+; GFX10PLUS-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-GISE-LABEL: atomic_add_2d:
+; GFX12-GISE: ; %bb.0:
+; GFX12-GISE-NEXT: image_atomic_add_uint v0, [v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D
+; GFX12-GISE-NEXT: s_endpgm
+;
+; GFX12-LABEL: atomic_add_2d:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D
+; GFX12-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @atomic_add_3d(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %r) {
+; GFX10PLUS-GISE-LABEL: atomic_add_3d:
+; GFX10PLUS-GISE: ; %bb.0:
+; GFX10PLUS-GISE-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm
+; GFX10PLUS-GISE-NEXT: s_endpgm
+;
+; GFX10PLUS-LABEL: atomic_add_3d:
+; GFX10PLUS: ; %bb.0:
+; GFX10PLUS-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-GISE-LABEL: atomic_add_3d:
+; GFX12-GISE: ; %bb.0:
+; GFX12-GISE-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D
+; GFX12-GISE-NEXT: s_endpgm
+;
+; GFX12-LABEL: atomic_add_3d:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D
+; GFX12-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32 %data, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @atomic_add_cube(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %face) {
+; GFX10PLUS-GISE-LABEL: atomic_add_cube:
+; GFX10PLUS-GISE: ; %bb.0:
+; GFX10PLUS-GISE-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE unorm
+; GFX10PLUS-GISE-NEXT: s_endpgm
+;
+; GFX10PLUS-LABEL: atomic_add_cube:
+; GFX10PLUS: ; %bb.0:
+; GFX10PLUS-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE unorm
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-GISE-LABEL: atomic_add_cube:
+; GFX12-GISE: ; %bb.0:
+; GFX12-GISE-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE
+; GFX12-GISE-NEXT: s_endpgm
+;
+; GFX12-LABEL: atomic_add_cube:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE
+; GFX12-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32 %data, i32 %s, i32 %t, i32 %face, <8 x i32> %rsrc, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @atomic_add_1darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %slice) {
+; GFX10PLUS-GISE-LABEL: atomic_add_1darray:
+; GFX10PLUS-GISE: ; %bb.0:
+; GFX10PLUS-GISE-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY unorm
+; GFX10PLUS-GISE-NEXT: s_endpgm
+;
+; GFX10PLUS-LABEL: atomic_add_1darray:
+; GFX10PLUS: ; %bb.0:
+; GFX10PLUS-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY unorm
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-GISE-LABEL: atomic_add_1darray:
+; GFX12-GISE: ; %bb.0:
+; GFX12-GISE-NEXT: image_atomic_add_uint v0, [v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY
+; GFX12-GISE-NEXT: s_endpgm
+;
+; GFX12-LABEL: atomic_add_1darray:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY
+; GFX12-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32 %data, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @atomic_add_2darray(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice) {
+; GFX10PLUS-GISE-LABEL: atomic_add_2darray:
+; GFX10PLUS-GISE: ; %bb.0:
+; GFX10PLUS-GISE-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY unorm
+; GFX10PLUS-GISE-NEXT: s_endpgm
+;
+; GFX10PLUS-LABEL: atomic_add_2darray:
+; GFX10PLUS: ; %bb.0:
+; GFX10PLUS-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY unorm
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-GISE-LABEL: atomic_add_2darray:
+; GFX12-GISE: ; %bb.0:
+; GFX12-GISE-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
+; GFX12-GISE-NEXT: s_endpgm
+;
+; GFX12-LABEL: atomic_add_2darray:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
+; GFX12-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @atomic_add_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %fragid) {
+; GFX10PLUS-GISE-LABEL: atomic_add_2dmsaa:
+; GFX10PLUS-GISE: ; %bb.0:
+; GFX10PLUS-GISE-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm
+; GFX10PLUS-GISE-NEXT: s_endpgm
+;
+; GFX10PLUS-LABEL: atomic_add_2dmsaa:
+; GFX10PLUS: ; %bb.0:
+; GFX10PLUS-NEXT: image_atomic_add v0, v[1:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-GISE-LABEL: atomic_add_2dmsaa:
+; GFX12-GISE: ; %bb.0:
+; GFX12-GISE-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA
+; GFX12-GISE-NEXT: s_endpgm
+;
+; GFX12-LABEL: atomic_add_2dmsaa:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2, v3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA
+; GFX12-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @atomic_add_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
+; GFX10PLUS-GISE-LABEL: atomic_add_2darraymsaa:
+; GFX10PLUS-GISE: ; %bb.0:
+; GFX10PLUS-GISE-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm
+; GFX10PLUS-GISE-NEXT: s_endpgm
+;
+; GFX10PLUS-LABEL: atomic_add_2darraymsaa:
+; GFX10PLUS: ; %bb.0:
+; GFX10PLUS-NEXT: image_atomic_add v0, v[1:4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-GISE-LABEL: atomic_add_2darraymsaa:
+; GFX12-GISE: ; %bb.0:
+; GFX12-GISE-NEXT: image_atomic_add_uint v0, [v1, v2, v3, v4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY
+; GFX12-GISE-NEXT: s_endpgm
+;
+; GFX12-LABEL: atomic_add_2darraymsaa:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: image_atomic_add_uint v0, [v1, v2, v3, v4], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY
+; GFX12-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32 %data, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0)
+ ret void
+}
+
+define amdgpu_ps void @atomic_add_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i32 %s) {
+; GFX10PLUS-GISE-LABEL: atomic_add_1d_slc:
+; GFX10PLUS-GISE: ; %bb.0:
+; GFX10PLUS-GISE-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm slc
+; GFX10PLUS-GISE-NEXT: s_endpgm
+;
+; GFX10PLUS-LABEL: atomic_add_1d_slc:
+; GFX10PLUS: ; %bb.0:
+; GFX10PLUS-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm slc
+; GFX10PLUS-NEXT: s_endpgm
+;
+; GFX12-GISE-LABEL: atomic_add_1d_slc:
+; GFX12-GISE: ; %bb.0:
+; GFX12-GISE-NEXT: image_atomic_add_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_LOAD_HT
+; GFX12-GISE-NEXT: s_endpgm
+;
+; GFX12-LABEL: atomic_add_1d_slc:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: image_atomic_add_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_LOAD_HT
+; GFX12-NEXT: s_endpgm
+ %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.pk.add.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.pk.add.ll
index fd436b416d9c6..0680a54fb2e07 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.pk.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.pk.add.ll
@@ -42,15 +42,15 @@ define amdgpu_ps float @atomic_pk_add_f16_1d_v2_noret(<8 x i32> inreg %rsrc, <2
; GFX12-SDAG-LABEL: atomic_pk_add_f16_1d_v2_noret:
; GFX12-SDAG: ; %bb.0: ; %main_body
; GFX12-SDAG-NEXT: image_atomic_pk_add_f16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
-; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: atomic_pk_add_f16_1d_v2_noret:
; GFX12-GISEL: ; %bb.0: ; %main_body
; GFX12-GISEL-NEXT: image_atomic_pk_add_f16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
-; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: ; return to shader part epilog
main_body:
%unused = call <2 x half> @llvm.amdgcn.image.atomic.pk.add.f16.1d.v2f16.v2f16(<2 x half> %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -80,15 +80,15 @@ define amdgpu_ps float @atomic_pk_add_f16_1d_v4_noret(<8 x i32> inreg %rsrc, <4
; GFX12-SDAG-LABEL: atomic_pk_add_f16_1d_v4_noret:
; GFX12-SDAG: ; %bb.0: ; %main_body
; GFX12-SDAG-NEXT: image_atomic_pk_add_f16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D
-; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: atomic_pk_add_f16_1d_v4_noret:
; GFX12-GISEL: ; %bb.0: ; %main_body
; GFX12-GISEL-NEXT: image_atomic_pk_add_f16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D
-; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: ; return to shader part epilog
main_body:
%unused = call <4 x half> @llvm.amdgcn.image.atomic.pk.add.f16.1d.v4f16.v4f16(<4 x half> %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -127,15 +127,15 @@ define amdgpu_ps float @atomic_pk_add_bf16_1d_v2_noret(<8 x i32> inreg %rsrc, <2
; GFX12-SDAG-LABEL: atomic_pk_add_bf16_1d_v2_noret:
; GFX12-SDAG: ; %bb.0: ; %main_body
; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
-; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: atomic_pk_add_bf16_1d_v2_noret:
; GFX12-GISEL: ; %bb.0: ; %main_body
; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
-; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: ; return to shader part epilog
main_body:
%unused = call <2 x bfloat> @llvm.amdgcn.image.atomic.pk.add.bf16.1d.v2bf16.v2bf16(<2 x bfloat> %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -174,15 +174,15 @@ define amdgpu_ps float @atomic_pk_add_bf16_1d_v4_noret(<8 x i32> inreg %rsrc, <4
; GFX12-SDAG-LABEL: atomic_pk_add_bf16_1d_v4_noret:
; GFX12-SDAG: ; %bb.0: ; %main_body
; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D
-; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: atomic_pk_add_bf16_1d_v4_noret:
; GFX12-GISEL: ; %bb.0: ; %main_body
; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D
-; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: ; return to shader part epilog
main_body:
%unused = call <4 x bfloat> @llvm.amdgcn.image.atomic.pk.add.bf16.1d.v4bf16.v4bf16(<4 x bfloat> %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -192,16 +192,16 @@ main_body:
define amdgpu_ps float @atomic_pk_add_bf16_1d_v4_nt(<8 x i32> inreg %rsrc, <4 x bfloat> %data, i32 %s) {
; GFX12-SDAG-LABEL: atomic_pk_add_bf16_1d_v4_nt:
; GFX12-SDAG: ; %bb.0: ; %main_body
-; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT
-; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
+; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_LOAD_HT
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: atomic_pk_add_bf16_1d_v4_nt:
; GFX12-GISEL: ; %bb.0: ; %main_body
-; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT
-; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
+; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_LOAD_HT
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: ; return to shader part epilog
main_body:
%unused = call <4 x bfloat> @llvm.amdgcn.image.atomic.pk.add.bf16.1d.v4bf16.v4bf16(<4 x bfloat> %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
>From 485df4e7a75eb12a89898859a711885cb8d09a37 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Thu, 18 Sep 2025 20:51:19 +0800
Subject: [PATCH 4/7] [AMDGPU] Update for comments.
---
.../AMDGPU/AMDGPUInstructionSelector.cpp | 12 +-
llvm/lib/Target/AMDGPU/MIMGInstructions.td | 388 ++++--------------
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 17 +-
3 files changed, 93 insertions(+), 324 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index af7414b816bc1..62729b39dcdc3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2007,13 +2007,13 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
MachineBasicBlock *MBB = MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();
unsigned IntrOpcode = Intr->BaseOpcode;
- Register ResultDef;
- if (MI.getNumExplicitDefs() > 0)
- ResultDef = MI.getOperand(0).getReg();
- if (MRI->use_nodbg_empty(ResultDef) && Intr->NoRetBaseOpcode != 0 &&
- Intr->NoRetBaseOpcode != Intr->BaseOpcode)
- IntrOpcode = Intr->NoRetBaseOpcode;
+ // For image atomic: use no-return opcode if result is unused.
+ if (Intr->NoRetBaseOpcode != 0 && Intr->NoRetBaseOpcode != Intr->BaseOpcode) {
+ Register ResultDef = MI.getOperand(0).getReg();
+ if (MRI->use_nodbg_empty(ResultDef))
+ IntrOpcode = Intr->NoRetBaseOpcode;
+ }
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
AMDGPU::getMIMGBaseOpcodeInfo(IntrOpcode);
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 3ed74d58eaae3..cdd1b87ade91f 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -873,124 +873,69 @@ multiclass MIMG_Store <mimgopc op, string asm, bit has_d16, bit mip = 0> {
}
class MIMG_Atomic_gfx6789_base <bits<8> op, string asm, RegisterOperand data_rc,
- RegisterClass addr_rc, string dns="">
- : MIMG_gfx6789 <op, (outs data_rc:$vdst), dns> {
- let Constraints = "$vdst = $vdata";
-
+ RegisterClass addr_rc, bit noRtn, string dns="">
+ : MIMG_gfx6789 <op, !if(noRtn, (outs), (outs data_rc:$vdst)), dns> {
+ let Constraints = !if(noRtn, "", "$vdst = $vdata");
+ let isCodeGenOnly = noRtn;
let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da);
- let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$cpol$r128$tfe$lwe$da";
-}
-
-class MIMG_Atomic_NoReturn_gfx6789_base <bits<8> op, string asm, RegisterOperand data_rc,
- RegisterClass addr_rc, string dns="">
- : MIMG_gfx6789 <op, (outs), dns> {
- let isCodeGenOnly = 1;
- let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
- DMask:$dmask, UNorm:$unorm, CPol_0:$cpol,
- R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da);
-
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$cpol$r128$tfe$lwe$da";
}
class MIMG_Atomic_gfx90a_base <bits<8> op, string asm, RegisterOperand data_rc,
- RegisterClass addr_rc, string dns="">
- : MIMG_gfx90a <op, (outs getAlign2RegOp<data_rc>.ret:$vdst), dns> {
- let Constraints = "$vdst = $vdata";
-
+ RegisterClass addr_rc, bit noRtn, string dns="">
+ : MIMG_gfx90a <op, !if(noRtn, (outs), (outs getAlign2RegOp<data_rc>.ret:$vdst)), dns> {
+ let Constraints = !if(noRtn, "", "$vdst = $vdata");
+ let isCodeGenOnly = noRtn;
let InOperandList = (ins getAlign2RegOp<data_rc>.ret:$vdata,
addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, LWE:$lwe, DA:$da);
- let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$cpol$r128$lwe$da";
-}
-
-class MIMG_Atomic_NoReturn_gfx90a_base <bits<8> op, string asm, RegisterOperand data_rc,
- RegisterClass addr_rc, string dns="">
- : MIMG_gfx90a <op, (outs), dns> {
- let isCodeGenOnly = 1;
- let InOperandList = (ins getAlign2RegOp<data_rc>.ret:$vdata,
- addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
- DMask:$dmask, UNorm:$unorm, CPol_0:$cpol,
- R128A16:$r128, LWE:$lwe, DA:$da);
-
let AsmString = asm#" $vdata, $vaddr, $srsrc$dmask$unorm$cpol$r128$lwe$da";
}
class MIMG_Atomic_si<mimgopc op, string asm, RegisterOperand data_rc,
- RegisterClass addr_rc, bit enableDasm = 0>
- : MIMG_Atomic_gfx6789_base<op.SI, asm, data_rc, addr_rc,
+ RegisterClass addr_rc, bit noRtn = 0, bit enableDasm = 0>
+ : MIMG_Atomic_gfx6789_base<op.SI, asm, data_rc, addr_rc, noRtn,
!if(enableDasm, "GFX6GFX7", "")> {
let AssemblerPredicate = isGFX6GFX7;
}
-class MIMG_Atomic_NoReturn_si<mimgopc op, string asm, RegisterOperand data_rc,
- RegisterClass addr_rc, bit enableDasm = 0>
- : MIMG_Atomic_NoReturn_gfx6789_base<op.SI, asm, data_rc, addr_rc,
- !if(enableDasm, "GFX6GFX7", "")> {
- let AssemblerPredicate = isGFX6GFX7;
-}
-
class MIMG_Atomic_vi<mimgopc op, string asm, RegisterOperand data_rc,
- RegisterClass addr_rc, bit enableDasm = 0>
- : MIMG_Atomic_gfx6789_base<op.VI, asm, data_rc, addr_rc, !if(enableDasm, "GFX8", "")> {
- let AssemblerPredicate = isGFX8GFX9NotGFX90A;
- let MIMGEncoding = MIMGEncGfx8;
-}
-
-class MIMG_Atomic_NoReturn_vi<mimgopc op, string asm, RegisterOperand data_rc,
- RegisterClass addr_rc, bit enableDasm = 0>
- : MIMG_Atomic_NoReturn_gfx6789_base<op.VI, asm, data_rc, addr_rc, !if(enableDasm, "GFX8", "")> {
+ RegisterClass addr_rc, bit noRtn = 0, bit enableDasm = 0>
+ : MIMG_Atomic_gfx6789_base<op.VI, asm, data_rc, addr_rc, noRtn, !if(enableDasm, "GFX8", "")> {
let AssemblerPredicate = isGFX8GFX9NotGFX90A;
let MIMGEncoding = MIMGEncGfx8;
}
class MIMG_Atomic_gfx90a<mimgopc op, string asm, RegisterOperand data_rc,
- RegisterClass addr_rc, bit enableDasm = 0>
- : MIMG_Atomic_gfx90a_base<op.VI, asm, data_rc, addr_rc, !if(enableDasm, "GFX90A", "")> {
- let AssemblerPredicate = isGFX90APlus;
- let MIMGEncoding = MIMGEncGfx90a;
-}
-
-class MIMG_Atomic_NoReturn_gfx90a<mimgopc op, string asm, RegisterOperand data_rc,
- RegisterClass addr_rc, bit enableDasm = 0>
- : MIMG_Atomic_NoReturn_gfx90a_base<op.VI, asm, data_rc, addr_rc, !if(enableDasm, "GFX90A", "")> {
+ RegisterClass addr_rc, bit noRtn = 0, bit enableDasm = 0>
+ : MIMG_Atomic_gfx90a_base<op.VI, asm, data_rc, addr_rc, noRtn, !if(enableDasm, "GFX90A", "")> {
let AssemblerPredicate = isGFX90APlus;
let MIMGEncoding = MIMGEncGfx90a;
}
class MIMG_Atomic_gfx10<mimgopc op, string opcode,
RegisterOperand DataRC, RegisterClass AddrRC,
- bit enableDisasm = 0>
- : MIMG_gfx10<op.GFX10M, (outs DataRC:$vdst),
+ bit noRtn = 0, bit enableDisasm = 0>
+ : MIMG_gfx10<op.GFX10M, !if(noRtn, (outs), (outs DataRC:$vdst)),
!if(enableDisasm, "GFX10", "")> {
- let Constraints = "$vdst = $vdata";
-
+ let Constraints = !if(noRtn, "","$vdst = $vdata");
+ let isCodeGenOnly = noRtn;
let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256_XNULL:$srsrc,
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe);
- let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
-}
-
-class MIMG_Atomic_NoReturn_gfx10<mimgopc op, string opcode,
- RegisterOperand DataRC, RegisterClass AddrRC,
- bit enableDisasm = 0>
- : MIMG_gfx10<op.GFX10M, (outs), !if(enableDisasm, "GFX10", "")> {
- let isCodeGenOnly = 1;
- let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256_XNULL:$srsrc,
- DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol_0:$cpol,
- R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe);
let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
}
class MIMG_Atomic_nsa_gfx10<mimgopc op, string opcode,
RegisterOperand DataRC, int num_addrs,
- bit enableDisasm = 0>
- : MIMG_nsa_gfx10<op.GFX10M, (outs DataRC:$vdst), num_addrs,
+ bit noRtn = 0, bit enableDisasm = 0>
+ : MIMG_nsa_gfx10<op.GFX10M, !if(noRtn, (outs), (outs DataRC:$vdst)), num_addrs,
!if(enableDisasm, "GFX10", "")> {
- let Constraints = "$vdst = $vdata";
-
+ let Constraints = !if(noRtn, "", "$vdst = $vdata");
+ let isCodeGenOnly = noRtn;
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
(ins SReg_256_XNULL:$srsrc, DMask:$dmask,
@@ -999,50 +944,26 @@ class MIMG_Atomic_nsa_gfx10<mimgopc op, string opcode,
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
}
-class MIMG_Atomic_NoReturn_nsa_gfx10<mimgopc op, string opcode,
- RegisterOperand DataRC, int num_addrs,
- bit enableDisasm = 0>
- : MIMG_nsa_gfx10<op.GFX10M, (outs), num_addrs, !if(enableDisasm, "GFX10", "")> {
- let isCodeGenOnly = 1;
- let InOperandList = !con((ins DataRC:$vdata),
- AddrIns,
- (ins SReg_256_XNULL:$srsrc, DMask:$dmask,
- Dim:$dim, UNorm:$unorm, CPol_0:$cpol,
- R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe));
- let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
-}
-
class MIMG_Atomic_gfx11<mimgopc op, string opcode,
RegisterOperand DataRC, RegisterClass AddrRC,
- bit enableDisasm = 0>
- : MIMG_gfx11<op.GFX11, (outs DataRC:$vdst),
+ bit noRtn = 0, bit enableDisasm = 0>
+ : MIMG_gfx11<op.GFX11, !if(noRtn, (outs), (outs DataRC:$vdst)),
!if(enableDisasm, "GFX11", "")> {
- let Constraints = "$vdst = $vdata";
-
+ let Constraints = !if(noRtn, "", "$vdst = $vdata");
+ let isCodeGenOnly = noRtn;
let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256_XNULL:$srsrc,
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe);
- let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
-}
-
-class MIMG_Atomic_NoReturn_gfx11<mimgopc op, string opcode,
- RegisterOperand DataRC, RegisterClass AddrRC,
- bit enableDisasm = 0>
- : MIMG_gfx11<op.GFX11, (outs), !if(enableDisasm, "GFX11", "")> {
- let isCodeGenOnly = 1;
- let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256_XNULL:$srsrc,
- DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol_0:$cpol,
- R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe);
let AsmString = opcode#" $vdata, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
}
class MIMG_Atomic_nsa_gfx11<mimgopc op, string opcode,
RegisterOperand DataRC, int num_addrs,
- bit enableDisasm = 0>
- : MIMG_nsa_gfx11<op.GFX11, (outs DataRC:$vdst), num_addrs,
+ bit noRtn = 0, bit enableDisasm = 0>
+ : MIMG_nsa_gfx11<op.GFX11, !if(noRtn, (outs), (outs DataRC:$vdst)), num_addrs,
!if(enableDisasm, "GFX11", "")> {
- let Constraints = "$vdst = $vdata";
-
+ let Constraints = !if(noRtn, "", "$vdst = $vdata");
+ let isCodeGenOnly = noRtn;
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
(ins SReg_256_XNULL:$srsrc, DMask:$dmask,
@@ -1051,25 +972,12 @@ class MIMG_Atomic_nsa_gfx11<mimgopc op, string opcode,
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
}
-class MIMG_Atomic_NoReturn_nsa_gfx11<mimgopc op, string opcode,
- RegisterOperand DataRC, int num_addrs,
- bit enableDisasm = 0>
- : MIMG_nsa_gfx11<op.GFX11, (outs), num_addrs, !if(enableDisasm, "GFX11", "")> {
- let isCodeGenOnly = 1;
- let InOperandList = !con((ins DataRC:$vdata),
- AddrIns,
- (ins SReg_256_XNULL:$srsrc, DMask:$dmask,
- Dim:$dim, UNorm:$unorm, CPol_0:$cpol,
- R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe));
- let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
-}
-
class VIMAGE_Atomic_gfx12<mimgopc op, string opcode, RegisterOperand DataRC,
- int num_addrs, string renamed, bit enableDisasm = 0>
- : VIMAGE_gfx12<op.GFX12, (outs DataRC:$vdst), num_addrs,
+ int num_addrs, string renamed, bit noRtn = 0, bit enableDisasm = 0>
+ : VIMAGE_gfx12<op.GFX12, !if(noRtn, (outs), (outs DataRC:$vdst)), num_addrs,
!if(enableDisasm, "GFX12", "")> {
- let Constraints = "$vdst = $vdata";
-
+ let Constraints = !if(noRtn,"", "$vdst = $vdata");
+ let isCodeGenOnly = noRtn;
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
(ins SReg_256_XNULL:$rsrc, DMask:$dmask, Dim:$dim,
@@ -1078,22 +986,11 @@ class VIMAGE_Atomic_gfx12<mimgopc op, string opcode, RegisterOperand DataRC,
", $rsrc$dmask$dim$cpol$r128$a16$tfe";
}
-class VIMAGE_Atomic_NoReturn_gfx12<mimgopc op, string opcode, RegisterOperand DataRC,
- int num_addrs, string renamed, bit enableDisasm = 0>
- : VIMAGE_gfx12<op.GFX12, (outs), num_addrs, !if(enableDisasm, "GFX12", "")> {
- let isCodeGenOnly = 1;
- let InOperandList = !con((ins DataRC:$vdata),
- AddrIns,
- (ins SReg_256_XNULL:$rsrc, DMask:$dmask, Dim:$dim,
- CPol_0:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe));
- let AsmString = !if(!empty(renamed), opcode, renamed)#" $vdata, "#AddrAsm#
- ", $rsrc$dmask$dim$cpol$r128$a16$tfe";
-}
-
multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
RegisterOperand data_rc,
bit enableDasm = 0,
bit isFP = 0,
+ bit noRtn = 0,
string renamed = ""> {
let hasSideEffects = 1, // FIXME: remove this
mayLoad = 1, mayStore = 1, hasPostISelHook = 0, DisableWQM = 1,
@@ -1101,88 +998,88 @@ multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
let VAddrDwords = 1 in {
let ssamp = 0 in {
if op.HAS_SI then {
- def _V1_si : MIMG_Atomic_si <op, asm, data_rc, VGPR_32, enableDasm>;
+ def _V1_si : MIMG_Atomic_si <op, asm, data_rc, VGPR_32, noRtn,enableDasm>;
}
if op.HAS_VI then {
- def _V1_vi : MIMG_Atomic_vi <op, asm, data_rc, VGPR_32, enableDasm>;
+ def _V1_vi : MIMG_Atomic_vi <op, asm, data_rc, VGPR_32, noRtn, enableDasm>;
let hasPostISelHook = 1 in
- def _V1_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VGPR_32, enableDasm>;
+ def _V1_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VGPR_32, noRtn, enableDasm>;
}
if op.HAS_GFX10M then {
- def _V1_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VGPR_32, enableDasm>;
+ def _V1_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VGPR_32, noRtn, enableDasm>;
}
if op.HAS_GFX11 then {
- def _V1_gfx11 : MIMG_Atomic_gfx11 <op, asm, data_rc, VGPR_32, enableDasm>;
+ def _V1_gfx11 : MIMG_Atomic_gfx11 <op, asm, data_rc, VGPR_32, noRtn, enableDasm>;
}
}
if op.HAS_GFX12 then {
- def _V1_gfx12 : VIMAGE_Atomic_gfx12 <op, asm, data_rc, 1, renamed>;
+ def _V1_gfx12 : VIMAGE_Atomic_gfx12 <op, asm, data_rc, 1, renamed, noRtn>;
}
}
let VAddrDwords = 2 in {
let ssamp = 0 in {
if op.HAS_SI then {
- def _V2_si : MIMG_Atomic_si <op, asm, data_rc, VReg_64, 0>;
+ def _V2_si : MIMG_Atomic_si <op, asm, data_rc, VReg_64, noRtn, 0>;
}
if op.HAS_VI then {
- def _V2_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_64, 0>;
- def _V2_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_64_Align2, 0>;
+ def _V2_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_64, noRtn, 0>;
+ def _V2_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_64_Align2, noRtn, 0>;
}
if op.HAS_GFX10M then {
- def _V2_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_64, 0>;
- def _V2_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 <op, asm, data_rc, 2, 0>;
+ def _V2_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_64, noRtn, 0>;
+ def _V2_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 <op, asm, data_rc, 2, noRtn, 0>;
}
if op.HAS_GFX11 then {
- def _V2_gfx11 : MIMG_Atomic_gfx11 <op, asm, data_rc, VReg_64, 0>;
- def _V2_nsa_gfx11 : MIMG_Atomic_nsa_gfx11 <op, asm, data_rc, 2, 0>;
+ def _V2_gfx11 : MIMG_Atomic_gfx11 <op, asm, data_rc, VReg_64, noRtn, 0>;
+ def _V2_nsa_gfx11 : MIMG_Atomic_nsa_gfx11 <op, asm, data_rc, 2, noRtn, 0>;
}
}
if op.HAS_GFX12 then {
- def _V2_gfx12 : VIMAGE_Atomic_gfx12 <op, asm, data_rc, 2, renamed>;
+ def _V2_gfx12 : VIMAGE_Atomic_gfx12 <op, asm, data_rc, 2, renamed, noRtn>;
}
}
let VAddrDwords = 3 in {
let ssamp = 0 in {
if op.HAS_SI then {
- def _V3_si : MIMG_Atomic_si <op, asm, data_rc, VReg_96, 0>;
+ def _V3_si : MIMG_Atomic_si <op, asm, data_rc, VReg_96, noRtn, 0>;
}
if op.HAS_VI then {
- def _V3_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_96, 0>;
- def _V3_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_96_Align2, 0>;
+ def _V3_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_96, noRtn, 0>;
+ def _V3_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_96_Align2, noRtn, 0>;
}
if op.HAS_GFX10M then {
- def _V3_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_96, 0>;
- def _V3_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 <op, asm, data_rc, 3, 0>;
+ def _V3_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_96, noRtn, 0>;
+ def _V3_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 <op, asm, data_rc, 3, noRtn, 0>;
}
if op.HAS_GFX11 then {
- def _V3_gfx11 : MIMG_Atomic_gfx11 <op, asm, data_rc, VReg_96, 0>;
- def _V3_nsa_gfx11 : MIMG_Atomic_nsa_gfx11 <op, asm, data_rc, 3, 0>;
+ def _V3_gfx11 : MIMG_Atomic_gfx11 <op, asm, data_rc, VReg_96, noRtn, 0>;
+ def _V3_nsa_gfx11 : MIMG_Atomic_nsa_gfx11 <op, asm, data_rc, 3, noRtn, 0>;
}
}
if op.HAS_GFX12 then {
- def _V3_gfx12 : VIMAGE_Atomic_gfx12 <op, asm, data_rc, 3, renamed>;
+ def _V3_gfx12 : VIMAGE_Atomic_gfx12 <op, asm, data_rc, 3, renamed, noRtn>;
}
}
let VAddrDwords = 4 in {
let ssamp = 0 in {
if op.HAS_SI then {
- def _V4_si : MIMG_Atomic_si <op, asm, data_rc, VReg_128, 0>;
+ def _V4_si : MIMG_Atomic_si <op, asm, data_rc, VReg_128, noRtn, 0>;
}
if op.HAS_VI then {
- def _V4_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_128, 0>;
- def _V4_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_128_Align2, 0>;
+ def _V4_vi : MIMG_Atomic_vi <op, asm, data_rc, VReg_128, noRtn, 0>;
+ def _V4_gfx90a : MIMG_Atomic_gfx90a <op, asm, data_rc, VReg_128_Align2, noRtn, 0>;
}
if op.HAS_GFX10M then {
- def _V4_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_128, 0>;
- def _V4_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 <op, asm, data_rc, 4, enableDasm>;
+ def _V4_gfx10 : MIMG_Atomic_gfx10 <op, asm, data_rc, VReg_128, noRtn, 0>;
+ def _V4_nsa_gfx10 : MIMG_Atomic_nsa_gfx10 <op, asm, data_rc, 4, noRtn, enableDasm>;
}
if op.HAS_GFX11 then {
- def _V4_gfx11 : MIMG_Atomic_gfx11 <op, asm, data_rc, VReg_128, 0>;
- def _V4_nsa_gfx11 : MIMG_Atomic_nsa_gfx11 <op, asm, data_rc, 4, enableDasm>;
+ def _V4_gfx11 : MIMG_Atomic_gfx11 <op, asm, data_rc, VReg_128, noRtn, 0>;
+ def _V4_nsa_gfx11 : MIMG_Atomic_nsa_gfx11 <op, asm, data_rc, 4, noRtn, enableDasm>;
}
}
if op.HAS_GFX12 then {
- def _V4_gfx12 : VIMAGE_Atomic_gfx12 <op, asm, data_rc, 4, renamed, enableDasm>;
+ def _V4_gfx12 : VIMAGE_Atomic_gfx12 <op, asm, data_rc, 4, renamed, noRtn, enableDasm>;
}
}
}
@@ -1195,142 +1092,13 @@ multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
}
}
-multiclass MIMG_Atomic_NoReturn_Addr_Helper_m <mimgopc op, string asm,
- RegisterOperand data_rc,
- bit enableDasm = 0,
- bit isFP = 0,
- string renamed = ""> {
- let hasSideEffects = 1,
- mayLoad = 1, mayStore = 1, hasPostISelHook = 0, DisableWQM = 1,
- FPAtomic = isFP in {
- let VAddrDwords = 1 in {
- let ssamp = 0 in {
- if op.HAS_SI then {
- def _V1_si : MIMG_Atomic_NoReturn_si <op, asm, data_rc, VGPR_32, enableDasm>;
- }
- if op.HAS_VI then {
- def _V1_vi : MIMG_Atomic_NoReturn_vi <op, asm, data_rc, VGPR_32, enableDasm>;
- let hasPostISelHook = 1 in
- def _V1_gfx90a : MIMG_Atomic_NoReturn_gfx90a <op, asm, data_rc, VGPR_32, enableDasm>;
- }
- if op.HAS_GFX10M then {
- def _V1_gfx10 : MIMG_Atomic_NoReturn_gfx10 <op, asm, data_rc, VGPR_32, enableDasm>;
- }
- if op.HAS_GFX11 then {
- def _V1_gfx11 : MIMG_Atomic_NoReturn_gfx11 <op, asm, data_rc, VGPR_32, enableDasm>;
- }
- }
- if op.HAS_GFX12 then {
- def _V1_gfx12 : VIMAGE_Atomic_NoReturn_gfx12 <op, asm, data_rc, 1, renamed>;
- }
- }
- let VAddrDwords = 2 in {
- let ssamp = 0 in {
- if op.HAS_SI then {
- def _V2_si : MIMG_Atomic_NoReturn_si <op, asm, data_rc, VReg_64, 0>;
- }
- if op.HAS_VI then {
- def _V2_vi : MIMG_Atomic_NoReturn_vi <op, asm, data_rc, VReg_64, 0>;
- def _V2_gfx90a : MIMG_Atomic_NoReturn_gfx90a <op, asm, data_rc, VReg_64, 0>;
- }
- if op.HAS_GFX10M then {
- def _V2_gfx10 : MIMG_Atomic_NoReturn_gfx10 <op, asm, data_rc, VReg_64, 0>;
- def _V2_nsa_gfx10 : MIMG_Atomic_NoReturn_nsa_gfx10 <op, asm, data_rc, 2, 0>;
- }
- if op.HAS_GFX11 then {
- def _V2_gfx11 : MIMG_Atomic_NoReturn_gfx11 <op, asm, data_rc, VReg_64, 0>;
- def _V2_nsa_gfx11 : MIMG_Atomic_NoReturn_nsa_gfx11 <op, asm, data_rc, 2, 0>;
- }
- }
- if op.HAS_GFX12 then {
- def _V2_gfx12 : VIMAGE_Atomic_NoReturn_gfx12 <op, asm, data_rc, 2, renamed>;
- }
- }
- let VAddrDwords = 3 in {
- let ssamp = 0 in {
- if op.HAS_SI then {
- def _V3_si : MIMG_Atomic_NoReturn_si <op, asm, data_rc, VReg_96, 0>;
- }
- if op.HAS_VI then {
- def _V3_vi : MIMG_Atomic_NoReturn_vi <op, asm, data_rc, VReg_96, 0>;
- def _V3_gfx90a : MIMG_Atomic_NoReturn_gfx90a <op, asm, data_rc, VReg_96, 0>;
- }
- if op.HAS_GFX10M then {
- def _V3_gfx10 : MIMG_Atomic_NoReturn_gfx10 <op, asm, data_rc, VReg_96, 0>;
- def _V3_nsa_gfx10 : MIMG_Atomic_NoReturn_nsa_gfx10 <op, asm, data_rc, 3, 0>;
- }
- if op.HAS_GFX11 then {
- def _V3_gfx11 : MIMG_Atomic_NoReturn_gfx11 <op, asm, data_rc, VReg_96, 0>;
- def _V3_nsa_gfx11 : MIMG_Atomic_NoReturn_nsa_gfx11 <op, asm, data_rc, 3, 0>;
- }
- }
- if op.HAS_GFX12 then {
- def _V3_gfx12 : VIMAGE_Atomic_NoReturn_gfx12 <op, asm, data_rc, 3, renamed>;
- }
- }
- let VAddrDwords = 4 in {
- let ssamp = 0 in {
- if op.HAS_SI then {
- def _V4_si : MIMG_Atomic_NoReturn_si <op, asm, data_rc, VReg_128, 0>;
- }
- if op.HAS_VI then {
- def _V4_vi : MIMG_Atomic_NoReturn_vi <op, asm, data_rc, VReg_128, 0>;
- def _V4_gfx90a : MIMG_Atomic_NoReturn_gfx90a <op, asm, data_rc, VReg_128, 0>;
- }
- if op.HAS_GFX10M then {
- def _V4_gfx10 : MIMG_Atomic_NoReturn_gfx10 <op, asm, data_rc, VReg_128, 0>;
- def _V4_nsa_gfx10 : MIMG_Atomic_NoReturn_nsa_gfx10 <op, asm, data_rc, 4, enableDasm>;
- }
- if op.HAS_GFX11 then {
- def _V4_gfx11 : MIMG_Atomic_NoReturn_gfx11 <op, asm, data_rc, VReg_128, 0>;
- def _V4_nsa_gfx11 : MIMG_Atomic_NoReturn_nsa_gfx11 <op, asm, data_rc, 4, enableDasm>;
- }
- }
- if op.HAS_GFX12 then {
- def _V4_gfx12 : VIMAGE_Atomic_NoReturn_gfx12 <op, asm, data_rc, 4, renamed, enableDasm>;
- }
- }
- }
-}
-
-multiclass MIMG_Atomic_NoReturn <mimgopc op, string asm, bit isCmpSwap = 0, bit isFP = 0,
- string renamed = ""> {
- def "" : MIMGBaseOpcode {
- let Atomic = 1;
- let AtomicX2 = isCmpSwap;
- let IsAtomicRet = 0;
- let NoReturn = 1;
- }
-
- let BaseOpcode = !cast<MIMGBaseOpcode>(NAME) in {
- // _V* variants have different vdata size, but the size is encoded implicitly,
- // using dmask and tfe. Only 32-bit variant is registered with disassembler.
- // Other variants are reconstructed by disassembler using dmask and tfe.
- if !not(isCmpSwap) then {
- let VDataDwords = 1 in
- defm _V1 : MIMG_Atomic_NoReturn_Addr_Helper_m <op, asm, AVLdSt_32, 1, isFP, renamed>;
- }
-
- let VDataDwords = 2 in
- defm _V2 : MIMG_Atomic_NoReturn_Addr_Helper_m <op, asm, AVLdSt_64, isCmpSwap, isFP, renamed>;
- let VDataDwords = 3 in
- defm _V3 : MIMG_Atomic_NoReturn_Addr_Helper_m <op, asm, AVLdSt_96, 0, isFP, renamed>;
-
- if isCmpSwap then {
- let VDataDwords = 4 in
- defm _V4 : MIMG_Atomic_NoReturn_Addr_Helper_m <op, asm, AVLdSt_128, 0, isFP, renamed>;
- let VDataDwords = 5 in
- defm _V5 : MIMG_Atomic_NoReturn_Addr_Helper_m <op, asm, AVLdSt_128, 0, isFP, renamed>;
- }
- }
-}
-
-multiclass MIMG_Atomic <mimgopc op, string asm, bit isCmpSwap = 0, bit isFP = 0,
- string renamed = ""> { // 64-bit atomics
- let IsAtomicRet = 1 in {
+multiclass MIMG_Atomic_Impl <mimgopc op, string asm, bit isCmpSwap = 0, bit isFP = 0,
+ bit noRtn = 0, string renamed = ""> { // 64-bit atomics
+ let IsAtomicRet = !if(noRtn, 0, 1) in {
def "" : MIMGBaseOpcode {
let Atomic = 1;
let AtomicX2 = isCmpSwap;
+ let NoReturn = !if(noRtn, 1, 0);
}
let BaseOpcode = !cast<MIMGBaseOpcode>(NAME) in {
@@ -1339,24 +1107,28 @@ multiclass MIMG_Atomic <mimgopc op, string asm, bit isCmpSwap = 0, bit isFP = 0,
// Other variants are reconstructed by disassembler using dmask and tfe.
if !not(isCmpSwap) then {
let VDataDwords = 1 in
- defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, AVLdSt_32, 1, isFP, renamed>;
+ defm _V1 : MIMG_Atomic_Addr_Helper_m <op, asm, AVLdSt_32, 1, isFP, noRtn, renamed>;
}
let VDataDwords = 2 in
- defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, AVLdSt_64, isCmpSwap, isFP, renamed>;
+ defm _V2 : MIMG_Atomic_Addr_Helper_m <op, asm, AVLdSt_64, isCmpSwap, isFP, noRtn, renamed>;
let VDataDwords = 3 in
- defm _V3 : MIMG_Atomic_Addr_Helper_m <op, asm, AVLdSt_96, 0, isFP, renamed>;
+ defm _V3 : MIMG_Atomic_Addr_Helper_m <op, asm, AVLdSt_96, 0, isFP, noRtn, renamed>;
if isCmpSwap then {
let VDataDwords = 4 in
- defm _V4 : MIMG_Atomic_Addr_Helper_m <op, asm, AVLdSt_128, 0, isFP, renamed>;
+ defm _V4 : MIMG_Atomic_Addr_Helper_m <op, asm, AVLdSt_128, 0, isFP, noRtn, renamed>;
let VDataDwords = 5 in
- defm _V5 : MIMG_Atomic_Addr_Helper_m <op, asm, AVLdSt_160, 0, isFP, renamed>;
+ defm _V5 : MIMG_Atomic_Addr_Helper_m <op, asm, AVLdSt_160, 0, isFP, noRtn, renamed>;
}
}
- } // End IsAtomicRet = 1
+ }
+}
- defm "_NORTN" : MIMG_Atomic_NoReturn <op, asm, isCmpSwap, isFP, renamed>;
+multiclass MIMG_Atomic <mimgopc op, string asm, bit isCmpSwap = 0, bit isFP = 0,
+ string renamed = ""> {
+ defm "" : MIMG_Atomic_Impl <op, asm, isCmpSwap, isFP, 0, renamed>;
+ defm "_NORTN" : MIMG_Atomic_Impl <op, asm, isCmpSwap, isFP, 1, renamed>;
}
multiclass MIMG_Atomic_Renamed <mimgopc op, string asm, string renamed,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 3a37206948f63..51d7d5cd01c3a 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9150,10 +9150,8 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
const GCNSubtarget *ST = &MF.getSubtarget<GCNSubtarget>();
unsigned IntrOpcode = Intr->BaseOpcode;
- if (!Op.getNode()->hasAnyUseOfValue(0)) {
- if (Intr->NoRetBaseOpcode != 0 && Intr->NoRetBaseOpcode != Intr->BaseOpcode)
- IntrOpcode = Intr->NoRetBaseOpcode;
- }
+ if (Intr->NoRetBaseOpcode != 0 && !Op.getNode()->hasAnyUseOfValue(0))
+ IntrOpcode = Intr->NoRetBaseOpcode;
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
AMDGPU::getMIMGBaseOpcodeInfo(IntrOpcode);
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
@@ -9167,6 +9165,7 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
ResultTypes.clear();
ResultTypes.push_back(MVT::Other);
}
+
bool IsD16 = false;
bool IsG16 = false;
bool IsA16 = false;
@@ -9541,12 +9540,10 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
}
if (BaseOpcode->NoReturn) {
- if (BaseOpcode->Atomic) {
- SmallVector<SDValue, 2> RetVals;
- RetVals.push_back(DAG.getPOISON(OrigResultTypes[0]));
- RetVals.push_back(SDValue(NewNode, 0));
- return DAG.getMergeValues(RetVals, DL);
- }
+ if (BaseOpcode->Atomic)
+ return DAG.getMergeValues(
+ {DAG.getPOISON(OrigResultTypes[0]), SDValue(NewNode, 0)}, DL);
+
return SDValue(NewNode, 0);
}
>From 7905830d85cebfd455723fcb9433208df8b751c1 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Fri, 19 Sep 2025 15:11:06 +0800
Subject: [PATCH 5/7] [AMDGPU] Update for reviewer.
---
.../AMDGPU/AMDGPUInstructionSelector.cpp | 2 +-
llvm/lib/Target/AMDGPU/MIMGInstructions.td | 8 +++----
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 19 ++++++++--------
.../llvm.amdgcn.image.atomic.dim.gfx90a.ll | 22 +++++--------------
.../AMDGPU/llvm.amdgcn.image.atomic.noret.ll | 4 ++--
.../AMDGPU/llvm.amdgcn.image.atomic.pk.add.ll | 14 ++----------
6 files changed, 25 insertions(+), 44 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 62729b39dcdc3..03b40c3a0113b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2009,7 +2009,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
unsigned IntrOpcode = Intr->BaseOpcode;
// For image atomic: use no-return opcode if result is unused.
- if (Intr->NoRetBaseOpcode != 0 && Intr->NoRetBaseOpcode != Intr->BaseOpcode) {
+ if (Intr->NoRetBaseOpcode != Intr->BaseOpcode) {
Register ResultDef = MI.getOperand(0).getReg();
if (MRI->use_nodbg_empty(ResultDef))
IntrOpcode = Intr->NoRetBaseOpcode;
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index cdd1b87ade91f..306855a1bdeea 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -994,11 +994,11 @@ multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
string renamed = ""> {
let hasSideEffects = 1, // FIXME: remove this
mayLoad = 1, mayStore = 1, hasPostISelHook = 0, DisableWQM = 1,
- FPAtomic = isFP in {
+ FPAtomic = isFP, IsAtomicNoRet = noRtn in {
let VAddrDwords = 1 in {
let ssamp = 0 in {
if op.HAS_SI then {
- def _V1_si : MIMG_Atomic_si <op, asm, data_rc, VGPR_32, noRtn,enableDasm>;
+ def _V1_si : MIMG_Atomic_si <op, asm, data_rc, VGPR_32, noRtn, enableDasm>;
}
if op.HAS_VI then {
def _V1_vi : MIMG_Atomic_vi <op, asm, data_rc, VGPR_32, noRtn, enableDasm>;
@@ -1094,11 +1094,11 @@ multiclass MIMG_Atomic_Addr_Helper_m <mimgopc op, string asm,
multiclass MIMG_Atomic_Impl <mimgopc op, string asm, bit isCmpSwap = 0, bit isFP = 0,
bit noRtn = 0, string renamed = ""> { // 64-bit atomics
- let IsAtomicRet = !if(noRtn, 0, 1) in {
+ let IsAtomicRet = !not(noRtn) in {
def "" : MIMGBaseOpcode {
let Atomic = 1;
let AtomicX2 = isCmpSwap;
- let NoReturn = !if(noRtn, 1, 0);
+ let NoReturn = noRtn;
}
let BaseOpcode = !cast<MIMGBaseOpcode>(NAME) in {
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 51d7d5cd01c3a..f81b0e69eb6a2 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9150,7 +9150,8 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
const GCNSubtarget *ST = &MF.getSubtarget<GCNSubtarget>();
unsigned IntrOpcode = Intr->BaseOpcode;
- if (Intr->NoRetBaseOpcode != 0 && !Op.getNode()->hasAnyUseOfValue(0))
+ if (Intr->NoRetBaseOpcode != Intr->BaseOpcode &&
+ !Op.getNode()->hasAnyUseOfValue(0))
IntrOpcode = Intr->NoRetBaseOpcode;
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
AMDGPU::getMIMGBaseOpcodeInfo(IntrOpcode);
@@ -9159,12 +9160,12 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
bool IsGFX11Plus = AMDGPU::isGFX11Plus(*Subtarget);
bool IsGFX12Plus = AMDGPU::isGFX12Plus(*Subtarget);
- SmallVector<EVT, 3> ResultTypes(Op->values());
SmallVector<EVT, 3> OrigResultTypes(Op->values());
- if (BaseOpcode->NoReturn && BaseOpcode->Atomic) {
- ResultTypes.clear();
+ SmallVector<EVT, 3> ResultTypes;
+ if (BaseOpcode->NoReturn && BaseOpcode->Atomic)
ResultTypes.push_back(MVT::Other);
- }
+ else
+ ResultTypes = OrigResultTypes;
bool IsD16 = false;
bool IsG16 = false;
@@ -9184,10 +9185,10 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
VData = Op.getOperand(2);
IsAtomicPacked16Bit =
- (Intr->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16 ||
- Intr->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16_NORTN ||
- Intr->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16 ||
- Intr->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16_NORTN);
+ (IntrOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16 ||
+ IntrOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16_NORTN ||
+ IntrOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16 ||
+ IntrOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16_NORTN);
bool Is64Bit = VData.getValueSizeInBits() == 64;
if (BaseOpcode->AtomicX2) {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.gfx90a.ll
index 49607e320bd0a..83f0229aea326 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.gfx90a.ll
@@ -92,8 +92,7 @@ define amdgpu_ps void @atomic_swap_1d_agpr_noret(<8 x i32> inreg %rsrc, i32 %s)
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def a0
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_accvgpr_read_b32 v1, a0
-; GFX90A-NEXT: image_atomic_swap v1, v0, s[0:7] dmask:0x1 unorm glc
+; GFX90A-NEXT: image_atomic_swap a0, v0, s[0:7] dmask:0x1 unorm
; GFX90A-NEXT: s_endpgm
%data = call i32 asm "; def $0", "=a"()
%unused = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -106,8 +105,7 @@ define amdgpu_ps void @atomic_add_2d_agpr_noret(<8 x i32> inreg %rsrc, i32 %s, i
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def a0
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0
-; GFX90A-NEXT: image_atomic_add v2, v[0:1], s[0:7] dmask:0x1 unorm glc
+; GFX90A-NEXT: image_atomic_add a0, v[0:1], s[0:7] dmask:0x1 unorm
; GFX90A-NEXT: s_endpgm
%data = call i32 asm "; def $0", "=a"()
%unused = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
@@ -123,9 +121,7 @@ define amdgpu_ps void @atomic_cmpswap_1d_agpr_noret(<8 x i32> inreg %rsrc, i32 %
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def a1
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0
-; GFX90A-NEXT: v_accvgpr_read_b32 v3, a1
-; GFX90A-NEXT: image_atomic_cmpswap v[2:3], v0, s[0:7] dmask:0x3 unorm glc
+; GFX90A-NEXT: image_atomic_cmpswap a[0:1], v0, s[0:7] dmask:0x3 unorm
; GFX90A-NEXT: s_endpgm
%cmp = call i32 asm "; def $0", "=a"()
%swap = call i32 asm "; def $0", "=a"()
@@ -139,9 +135,7 @@ define amdgpu_ps void @atomic_swap_1d_i64_agpr_noret(<8 x i32> inreg %rsrc, i32
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def a[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_accvgpr_read_b32 v3, a1
-; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0
-; GFX90A-NEXT: image_atomic_swap v[2:3], v0, s[0:7] dmask:0x3 unorm glc
+; GFX90A-NEXT: image_atomic_swap a[0:1], v0, s[0:7] dmask:0x3 unorm
; GFX90A-NEXT: s_endpgm
%data = call i64 asm "; def $0", "=a"()
%unused = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -154,14 +148,10 @@ define amdgpu_ps void @atomic_cmpswap_1d_64_agpr_noret(<8 x i32> inreg %rsrc, i3
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; def a[0:1]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_accvgpr_read_b32 v3, a1
-; GFX90A-NEXT: v_accvgpr_read_b32 v2, a0
; GFX90A-NEXT: ;;#ASMSTART
-; GFX90A-NEXT: ; def a[0:1]
+; GFX90A-NEXT: ; def a[2:3]
; GFX90A-NEXT: ;;#ASMEND
-; GFX90A-NEXT: v_accvgpr_read_b32 v5, a1
-; GFX90A-NEXT: v_accvgpr_read_b32 v4, a0
-; GFX90A-NEXT: image_atomic_cmpswap v[2:5], v0, s[0:7] dmask:0xf unorm glc
+; GFX90A-NEXT: image_atomic_cmpswap a[0:3], v0, s[0:7] dmask:0xf unorm
; GFX90A-NEXT: s_endpgm
%cmp = call i64 asm "; def $0", "=a"()
%swap = call i64 asm "; def $0", "=a"()
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.noret.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.noret.ll
index 343fa071a54be..6c58a1a30bd4c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.noret.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.noret.ll
@@ -569,12 +569,12 @@ define amdgpu_ps void @atomic_add_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i32 %
;
; GFX12-GISE-LABEL: atomic_add_1d_slc:
; GFX12-GISE: ; %bb.0:
-; GFX12-GISE-NEXT: image_atomic_add_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_LOAD_HT
+; GFX12-GISE-NEXT: image_atomic_add_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT
; GFX12-GISE-NEXT: s_endpgm
;
; GFX12-LABEL: atomic_add_1d_slc:
; GFX12: ; %bb.0:
-; GFX12-NEXT: image_atomic_add_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_LOAD_HT
+; GFX12-NEXT: image_atomic_add_uint v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT
; GFX12-NEXT: s_endpgm
%v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.pk.add.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.pk.add.ll
index 0680a54fb2e07..0ba62e49cabc3 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.pk.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.pk.add.ll
@@ -43,14 +43,12 @@ define amdgpu_ps float @atomic_pk_add_f16_1d_v2_noret(<8 x i32> inreg %rsrc, <2
; GFX12-SDAG: ; %bb.0: ; %main_body
; GFX12-SDAG-NEXT: image_atomic_pk_add_f16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: atomic_pk_add_f16_1d_v2_noret:
; GFX12-GISEL: ; %bb.0: ; %main_body
; GFX12-GISEL-NEXT: image_atomic_pk_add_f16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: ; return to shader part epilog
main_body:
%unused = call <2 x half> @llvm.amdgcn.image.atomic.pk.add.f16.1d.v2f16.v2f16(<2 x half> %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -81,14 +79,12 @@ define amdgpu_ps float @atomic_pk_add_f16_1d_v4_noret(<8 x i32> inreg %rsrc, <4
; GFX12-SDAG: ; %bb.0: ; %main_body
; GFX12-SDAG-NEXT: image_atomic_pk_add_f16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: atomic_pk_add_f16_1d_v4_noret:
; GFX12-GISEL: ; %bb.0: ; %main_body
; GFX12-GISEL-NEXT: image_atomic_pk_add_f16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: ; return to shader part epilog
main_body:
%unused = call <4 x half> @llvm.amdgcn.image.atomic.pk.add.f16.1d.v4f16.v4f16(<4 x half> %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -128,14 +124,12 @@ define amdgpu_ps float @atomic_pk_add_bf16_1d_v2_noret(<8 x i32> inreg %rsrc, <2
; GFX12-SDAG: ; %bb.0: ; %main_body
; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: atomic_pk_add_bf16_1d_v2_noret:
; GFX12-GISEL: ; %bb.0: ; %main_body
; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: ; return to shader part epilog
main_body:
%unused = call <2 x bfloat> @llvm.amdgcn.image.atomic.pk.add.bf16.1d.v2bf16.v2bf16(<2 x bfloat> %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -175,14 +169,12 @@ define amdgpu_ps float @atomic_pk_add_bf16_1d_v4_noret(<8 x i32> inreg %rsrc, <4
; GFX12-SDAG: ; %bb.0: ; %main_body
; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: atomic_pk_add_bf16_1d_v4_noret:
; GFX12-GISEL: ; %bb.0: ; %main_body
; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: ; return to shader part epilog
main_body:
%unused = call <4 x bfloat> @llvm.amdgcn.image.atomic.pk.add.bf16.1d.v4bf16.v4bf16(<4 x bfloat> %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
@@ -192,16 +184,14 @@ main_body:
define amdgpu_ps float @atomic_pk_add_bf16_1d_v4_nt(<8 x i32> inreg %rsrc, <4 x bfloat> %data, i32 %s) {
; GFX12-SDAG-LABEL: atomic_pk_add_bf16_1d_v4_nt:
; GFX12-SDAG: ; %bb.0: ; %main_body
-; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_LOAD_HT
+; GFX12-SDAG-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: atomic_pk_add_bf16_1d_v4_nt:
; GFX12-GISEL: ; %bb.0: ; %main_body
-; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_LOAD_HT
+; GFX12-GISEL-NEXT: image_atomic_pk_add_bf16 v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D th:TH_ATOMIC_NT
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 1.0
-; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: ; return to shader part epilog
main_body:
%unused = call <4 x bfloat> @llvm.amdgcn.image.atomic.pk.add.bf16.1d.v4bf16.v4bf16(<4 x bfloat> %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
>From 8f6a51323277334273bc45b444a064457b34176b Mon Sep 17 00:00:00 2001
From: Harrison <tsworld1314 at gmail.com>
Date: Sat, 20 Sep 2025 10:03:54 +0800
Subject: [PATCH 6/7] [AMDGPU] Renamed atomic no return baseopcode
---
llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h | 2 +-
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 4 ++--
llvm/lib/Target/AMDGPU/MIMGInstructions.td | 8 ++++----
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 5 +++--
4 files changed, 10 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 21624fcda4929..529da8d28a3c1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -50,7 +50,7 @@ const D16ImageDimIntrinsic *lookupD16ImageDimIntrinsic(unsigned Intr);
struct ImageDimIntrinsicInfo {
unsigned Intr;
unsigned BaseOpcode;
- unsigned NoRetBaseOpcode;
+ unsigned AtomicNoRetBaseOpcode;
MIMGDim Dim;
uint8_t NumOffsetArgs;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 03b40c3a0113b..f217d61b00f90 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2009,10 +2009,10 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
unsigned IntrOpcode = Intr->BaseOpcode;
// For image atomic: use no-return opcode if result is unused.
- if (Intr->NoRetBaseOpcode != Intr->BaseOpcode) {
+ if (Intr->AtomicNoRetBaseOpcode != Intr->BaseOpcode) {
Register ResultDef = MI.getOperand(0).getReg();
if (MRI->use_nodbg_empty(ResultDef))
- IntrOpcode = Intr->NoRetBaseOpcode;
+ IntrOpcode = Intr->AtomicNoRetBaseOpcode;
}
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 306855a1bdeea..1b3bace9786dd 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -1823,7 +1823,7 @@ let SubtargetPredicate = isGFX12Plus in {
class ImageDimIntrinsicInfo<AMDGPUImageDimIntrinsic I> {
Intrinsic Intr = I;
MIMGBaseOpcode BaseOpcode = !cast<MIMGBaseOpcode>(!strconcat("IMAGE_", I.P.OpMod));
- MIMGBaseOpcode NoRetBaseOpcode = BaseOpcode;
+ MIMGBaseOpcode AtomicNoRetBaseOpcode = BaseOpcode;
AMDGPUDimProps Dim = I.P.Dim;
AMDGPUImageDimIntrinsicEval DimEval = AMDGPUImageDimIntrinsicEval<I.P>;
@@ -1861,18 +1861,18 @@ class ImageDimIntrinsicInfo<AMDGPUImageDimIntrinsic I> {
class ImageDimAtomicIntrinsicInfo<AMDGPUImageDimIntrinsic I>
: ImageDimIntrinsicInfo<I> {
- MIMGBaseOpcode NoRetBaseOpcode =
+ MIMGBaseOpcode AtomicNoRetBaseOpcode =
!cast<MIMGBaseOpcode>(!strconcat("IMAGE_", I.P.OpMod, "_NORTN"));
}
def ImageDimIntrinsicTable : GenericTable {
let FilterClass = "ImageDimIntrinsicInfo";
- let Fields = ["Intr", "BaseOpcode", "NoRetBaseOpcode", "Dim", "NumOffsetArgs", "NumBiasArgs", "NumZCompareArgs", "NumGradients", "NumDmask", "NumData",
+ let Fields = ["Intr", "BaseOpcode", "AtomicNoRetBaseOpcode", "Dim", "NumOffsetArgs", "NumBiasArgs", "NumZCompareArgs", "NumGradients", "NumDmask", "NumData",
"NumVAddrs", "NumArgs", "DMaskIndex", "VAddrStart", "OffsetIndex", "BiasIndex", "ZCompareIndex", "GradientStart", "CoordStart", "LodIndex", "MipIndex",
"VAddrEnd", "RsrcIndex", "SampIndex", "UnormIndex", "TexFailCtrlIndex", "CachePolicyIndex",
"BiasTyArg", "GradientTyArg", "CoordTyArg"];
string TypeOf_BaseOpcode = "MIMGBaseOpcode";
- string TypeOf_NoRetBaseOpcode = "MIMGBaseOpcode";
+ string TypeOf_AtomicNoRetBaseOpcode = "MIMGBaseOpcode";
string TypeOf_Dim = "MIMGDim";
let PrimaryKey = ["Intr"];
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index f81b0e69eb6a2..776a4fc18b12d 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9150,9 +9150,10 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
const GCNSubtarget *ST = &MF.getSubtarget<GCNSubtarget>();
unsigned IntrOpcode = Intr->BaseOpcode;
- if (Intr->NoRetBaseOpcode != Intr->BaseOpcode &&
+ // For image atomic: use no-return opcode if result is unused.
+ if (Intr->AtomicNoRetBaseOpcode != Intr->BaseOpcode &&
!Op.getNode()->hasAnyUseOfValue(0))
- IntrOpcode = Intr->NoRetBaseOpcode;
+ IntrOpcode = Intr->AtomicNoRetBaseOpcode;
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
AMDGPU::getMIMGBaseOpcodeInfo(IntrOpcode);
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
>From 213ff48247b154043c837942a54298116e536ce1 Mon Sep 17 00:00:00 2001
From: Harrison Hao <tsworld1314 at gmail.com>
Date: Sat, 20 Sep 2025 21:27:26 +0800
Subject: [PATCH 7/7] [AMDGPU] Update
---
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index c690b2b7129b4..903e6c39805e5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -6703,9 +6703,17 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
// We are only processing the operands of d16 image operations on subtargets
// that use the unpacked register layout, or need to repack the TFE result.
+ unsigned IntrOpcode = Intr->BaseOpcode;
+ // For image atomic: use no-return opcode if result is unused.
+ if (Intr->AtomicNoRetBaseOpcode != Intr->BaseOpcode) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ Register ResultDef = MI.getOperand(0).getReg();
+ if (MRI.use_nodbg_empty(ResultDef))
+ IntrOpcode = Intr->AtomicNoRetBaseOpcode;
+ }
// TODO: Do we need to guard against already legalized intrinsics?
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
- AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
+ AMDGPU::getMIMGBaseOpcodeInfo(IntrOpcode);
MachineRegisterInfo *MRI = B.getMRI();
const LLT S32 = LLT::scalar(32);
@@ -6723,7 +6731,9 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
const bool IsAtomicPacked16Bit =
(BaseOpcode->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16 ||
- BaseOpcode->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16);
+ BaseOpcode->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_F16_NORTN ||
+ BaseOpcode->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16 ||
+ BaseOpcode->BaseOpcode == AMDGPU::IMAGE_ATOMIC_PK_ADD_BF16_NORTN);
// Check for 16 bit addresses and pack if true.
LLT GradTy =
More information about the llvm-commits
mailing list