[llvm] AMDGPU/UniformityAnalysis: fix G_ZEXTLOAD and G_SEXTLOAD (PR #157845)
Petar Avramovic via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 10 08:21:40 PDT 2025
https://github.com/petar-avramovic updated https://github.com/llvm/llvm-project/pull/157845
>From 310a7b659ffedae12fadf446860e2221e617263f Mon Sep 17 00:00:00 2001
From: Petar Avramovic <Petar.Avramovic at amd.com>
Date: Wed, 10 Sep 2025 13:04:20 +0200
Subject: [PATCH] AMDGPU/UniformityAnalysis: fix G_ZEXTLOAD and G_SEXTLOAD
Use same rules for G_ZEXTLOAD and G_SEXTLOAD as for G_LOAD.
Flat addrspace(0) and private addrspace(5) G_ZEXTLOAD and G_SEXTLOAD
should be always divergent.
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 15 +++++++-------
.../AMDGPU/MIR/loads-gmir.mir | 20 +++++++++++--------
2 files changed, 20 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 5c958dfe6954f..398c99b3bd127 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -10281,7 +10281,7 @@ unsigned SIInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
InstructionUniformity
SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
- unsigned opcode = MI.getOpcode();
+ unsigned Opcode = MI.getOpcode();
auto HandleAddrSpaceCast = [this, &MRI](const MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
@@ -10301,7 +10301,7 @@ SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
// If the target supports globally addressable scratch, the mapping from
// scratch memory to the flat aperture changes therefore an address space cast
// is no longer uniform.
- if (opcode == TargetOpcode::G_ADDRSPACE_CAST)
+ if (Opcode == TargetOpcode::G_ADDRSPACE_CAST)
return HandleAddrSpaceCast(MI);
if (auto *GI = dyn_cast<GIntrinsic>(&MI)) {
@@ -10329,7 +10329,8 @@ SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
//
// All other loads are not divergent, because if threads issue loads with the
// same arguments, they will always get the same result.
- if (opcode == AMDGPU::G_LOAD) {
+ if (Opcode == AMDGPU::G_LOAD || Opcode == AMDGPU::G_ZEXTLOAD ||
+ Opcode == AMDGPU::G_SEXTLOAD) {
if (MI.memoperands_empty())
return InstructionUniformity::NeverUniform; // conservative assumption
@@ -10343,10 +10344,10 @@ SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
return InstructionUniformity::Default;
}
- if (SIInstrInfo::isGenericAtomicRMWOpcode(opcode) ||
- opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
- opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
- AMDGPU::isGenericAtomic(opcode)) {
+ if (SIInstrInfo::isGenericAtomicRMWOpcode(Opcode) ||
+ Opcode == AMDGPU::G_ATOMIC_CMPXCHG ||
+ Opcode == AMDGPU::G_ATOMIC_CMPXCHG_WITH_SUCCESS ||
+ AMDGPU::isGenericAtomic(Opcode)) {
return InstructionUniformity::NeverUniform;
}
return InstructionUniformity::Default;
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/loads-gmir.mir b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/loads-gmir.mir
index cb3c2de5b8753..d799cd2057f47 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/loads-gmir.mir
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/MIR/loads-gmir.mir
@@ -46,13 +46,13 @@ body: |
%6:_(p5) = G_IMPLICIT_DEF
; Atomic load
- ; CHECK-NOT: DIVERGENT
-
+ ; CHECK: DIVERGENT
+ ; CHECK-SAME: G_ZEXTLOAD
%0:_(s32) = G_ZEXTLOAD %1(p0) :: (load seq_cst (s16) from `ptr undef`)
; flat load
- ; CHECK-NOT: DIVERGENT
-
+ ; CHECK: DIVERGENT
+ ; CHECK-SAME: G_ZEXTLOAD
%2:_(s32) = G_ZEXTLOAD %1(p0) :: (load (s16) from `ptr undef`)
; Gloabal load
@@ -60,7 +60,8 @@ body: |
%3:_(s32) = G_ZEXTLOAD %4(p1) :: (load (s16) from `ptr addrspace(1) undef`, addrspace 1)
; Private load
- ; CHECK-NOT: DIVERGENT
+ ; CHECK: DIVERGENT
+ ; CHECK-SAME: G_ZEXTLOAD
%5:_(s32) = G_ZEXTLOAD %6(p5) :: (volatile load (s16) from `ptr addrspace(5) undef`, addrspace 5)
G_STORE %2(s32), %4(p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
G_STORE %3(s32), %4(p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
@@ -80,11 +81,13 @@ body: |
%6:_(p5) = G_IMPLICIT_DEF
; Atomic load
- ; CHECK-NOT: DIVERGENT
+ ; CHECK: DIVERGENT
+ ; CHECK-SAME: G_SEXTLOAD
%0:_(s32) = G_SEXTLOAD %1(p0) :: (load seq_cst (s16) from `ptr undef`)
; flat load
- ; CHECK-NOT: DIVERGENT
+ ; CHECK: DIVERGENT
+ ; CHECK-SAME: G_SEXTLOAD
%2:_(s32) = G_SEXTLOAD %1(p0) :: (load (s16) from `ptr undef`)
; Gloabal load
@@ -92,7 +95,8 @@ body: |
%3:_(s32) = G_SEXTLOAD %4(p1) :: (load (s16) from `ptr addrspace(1) undef`, addrspace 1)
; Private load
- ; CHECK-NOT: DIVERGENT
+ ; CHECK: DIVERGENT
+ ; CHECK-SAME: G_SEXTLOAD
%5:_(s32) = G_SEXTLOAD %6(p5) :: (volatile load (s16) from `ptr addrspace(5) undef`, addrspace 5)
G_STORE %2(s32), %4(p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
G_STORE %3(s32), %4(p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
More information about the llvm-commits
mailing list