[llvm] de82fde - AMDGPU/Uniformity/GlobalISel: G_AMDGPU atomics are always divergent
Mirko Brkusanin via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 18 09:24:51 PDT 2023
Author: Mirko Brkusanin
Date: 2023-08-18T18:23:40+02:00
New Revision: de82fde22d3253a6415ae55e1a75ab02785d9057
URL: https://github.com/llvm/llvm-project/commit/de82fde22d3253a6415ae55e1a75ab02785d9057
DIFF: https://github.com/llvm/llvm-project/commit/de82fde22d3253a6415ae55e1a75ab02785d9057.diff
LOG: AMDGPU/Uniformity/GlobalISel: G_AMDGPU atomics are always divergent
Patch by: Acim Maravic
Differential Revision: https://reviews.llvm.org/D157091
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/atomics-gmir.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index a0508b550881ca..2084e722a8888b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -8662,7 +8662,8 @@ SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
if (SIInstrInfo::isGenericAtomicRMWOpcode(opcode) ||
opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
- opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
+ opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
+ AMDGPU::isGenericAtomic(opcode)) {
return InstructionUniformity::NeverUniform;
}
return InstructionUniformity::Default;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 5859f978e890e3..4838e9595d85c8 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -512,6 +512,28 @@ bool isPermlane16(unsigned Opc) {
Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11;
}
+bool isGenericAtomic(unsigned Opc) {
+ return Opc == AMDGPU::G_AMDGPU_ATOMIC_FMIN ||
+ Opc == AMDGPU::G_AMDGPU_ATOMIC_FMAX ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
+ Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
+ Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
+}
+
bool isTrue16Inst(unsigned Opc) {
const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc);
return Info ? Info->IsTrue16 : false;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 69236fba6e4cb5..4e98e4593fc8cd 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -547,6 +547,9 @@ bool isMAC(unsigned Opc);
LLVM_READNONE
bool isPermlane16(unsigned Opc);
+LLVM_READNONE
+bool isGenericAtomic(unsigned Opc);
+
namespace VOPD {
enum Component : unsigned {
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/atomics-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/atomics-gmir.mir
index a2ef66fe47a09c..6581c5c3ca9c71 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/atomics-gmir.mir
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/atomics-gmir.mir
@@ -85,3 +85,76 @@ body: |
SI_RETURN implicit $vgpr0
...
+
+---
+name: test_buffer_atomics_always_divergent
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $sgpr0, $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7
+
+ %0:_(s32) = COPY $sgpr0
+ %1:sgpr(p0) = COPY $sgpr2_sgpr3
+ %2:_(s32) = IMPLICIT_DEF
+ %3:_(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7
+ %4:_(s32) = G_CONSTANT i32 0
+
+ ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_ATOMIC_FMIN
+ %5:_(s32) = G_AMDGPU_ATOMIC_FMIN %0, %3
+
+ ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_ATOMIC_FMAX
+ %6:_(s32) = G_AMDGPU_ATOMIC_FMAX %0, %3
+
+ ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_SWAP
+ %7:_(s32) = G_AMDGPU_BUFFER_ATOMIC_SWAP %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+
+ ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_ADD
+ %8:_(s32) = G_AMDGPU_BUFFER_ATOMIC_ADD %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+
+ ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_SUB
+ %9:_(s32) = G_AMDGPU_BUFFER_ATOMIC_SUB %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+
+ ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_SMIN
+ %10:_(s32) = G_AMDGPU_BUFFER_ATOMIC_SMIN %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+
+ ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_UMIN
+ %11:_(s32) = G_AMDGPU_BUFFER_ATOMIC_UMIN %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+
+ ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_SMAX
+ %12:_(s32) = G_AMDGPU_BUFFER_ATOMIC_SMAX %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+
+ ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_UMAX
+ %13:_(s32) = G_AMDGPU_BUFFER_ATOMIC_UMAX %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+
+ ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_AND
+ %14:_(s32) = G_AMDGPU_BUFFER_ATOMIC_AND %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+
+ ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_OR
+ %15:_(s32) = G_AMDGPU_BUFFER_ATOMIC_OR %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+
+ ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_XOR
+ %16:_(s32) = G_AMDGPU_BUFFER_ATOMIC_XOR %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+
+ ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_INC
+ %17:_(s32) = G_AMDGPU_BUFFER_ATOMIC_INC %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+
+ ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_DEC
+ %18:_(s32) = G_AMDGPU_BUFFER_ATOMIC_DEC %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+
+ ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_FADD
+ %19:_(s32) = G_AMDGPU_BUFFER_ATOMIC_FADD %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+
+ ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_FMIN
+ %20:_(s32) = G_AMDGPU_BUFFER_ATOMIC_FMIN %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+
+ ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_FMAX
+ %21:_(s32) = G_AMDGPU_BUFFER_ATOMIC_FMAX %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+
+ ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_CMPSWAP
+ %22:_(s32) = G_AMDGPU_BUFFER_ATOMIC_CMPSWAP %0, %4, %3, %2, %2, %2, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
+
+ ; CHECK: DIVERGENT
+ ; CHECK-SAME: G_AMDGPU_ATOMIC_CMPXCHG
+ %23:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %1, %4 :: (load store seq_cst (s32), addrspace 0)
+
+...
More information about the llvm-commits
mailing list