[llvm] [AMDGPU][NFC] Test clean up. (PR #85775)
Pravin Jagtap via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 19 04:41:22 PDT 2024
https://github.com/pravinjagtap created https://github.com/llvm/llvm-project/pull/85775
Added common check for DPP and Iterative stategies for uniform value case since optimization applied is same.
>From 8a2ccbacd307524654f2efc695de68e82fd66dc3 Mon Sep 17 00:00:00 2001
From: Pravin Jagtap <Pravin.Jagtap at amd.com>
Date: Tue, 19 Mar 2024 07:35:27 -0400
Subject: [PATCH] [AMDGPU][NFC] Test clean up.
Added common check for DPP and Iterative stategies for uniform value
case since optimization applied is same.
---
.../AMDGPU/global_atomic_optimizer_fp_rtn.ll | 307 ++++++------------
.../global_atomics_optimizer_fp_no_rtn.ll | 259 +++++----------
2 files changed, 184 insertions(+), 382 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomic_optimizer_fp_rtn.ll b/llvm/test/CodeGen/AMDGPU/global_atomic_optimizer_fp_rtn.ll
index e3fada3459a07f..538ef42121b83b 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomic_optimizer_fp_rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomic_optimizer_fp_rtn.ll
@@ -1,71 +1,43 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -mtriple=amdgcn-- -mcpu=gfx906 -amdgpu-atomic-optimizer-strategy=Iterative -passes='amdgpu-atomic-optimizer,verify<domtree>' %s | FileCheck -check-prefix=IR-ITERATIVE %s
-; RUN: opt -S -mtriple=amdgcn-- -mcpu=gfx906 -amdgpu-atomic-optimizer-strategy=DPP -passes='amdgpu-atomic-optimizer,verify<domtree>' %s | FileCheck -check-prefix=IR-DPP %s
+; RUN: opt -S -mtriple=amdgcn-- -mcpu=gfx906 -amdgpu-atomic-optimizer-strategy=Iterative -passes='amdgpu-atomic-optimizer,verify<domtree>' %s | FileCheck --check-prefixes=IR,IR-ITERATIVE %s
+; RUN: opt -S -mtriple=amdgcn-- -mcpu=gfx906 -amdgpu-atomic-optimizer-strategy=DPP -passes='amdgpu-atomic-optimizer,verify<domtree>' %s | FileCheck --check-prefixes=IR,IR-DPP %s
+
+; Tests various combinations of uniform/divergent address and uniform/divergent value inputs of various types for atomic operations.
+; Optimization remains same for Iterative and DPP strategies when value in uniform. These different scan/reduction
+; strategies are valid for only divergent values. This optimization is valid for divergent addresses. Test also covers different scopes.
define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float inreg %val) #0 {
-; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(
-; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
-; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP24:%.*]]
-; IR-ITERATIVE: 2:
-; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
-; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
-; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
-; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
-; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
-; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
-; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]])
-; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
-; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = uitofp i32 [[TMP10]] to float
-; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = fmul float [[VAL:%.*]], [[TMP11]]
-; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
-; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
-; IR-ITERATIVE: 14:
-; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("agent") monotonic, align 4
-; IR-ITERATIVE-NEXT: br label [[TMP16]]
-; IR-ITERATIVE: 16:
-; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
-; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = bitcast float [[TMP17]] to i32
-; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP18]])
-; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = bitcast i32 [[TMP19]] to float
-; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = uitofp i32 [[TMP8]] to float
-; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = fmul float [[VAL]], [[TMP21]]
-; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = fadd float [[TMP20]], [[TMP22]]
-; IR-ITERATIVE-NEXT: br label [[TMP24]]
-; IR-ITERATIVE: 24:
-; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP23]], [[TMP16]] ]
-; IR-ITERATIVE-NEXT: ret float [[TMP25]]
-;
-; IR-DPP-LABEL: @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(
-; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
-; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP24:%.*]]
-; IR-DPP: 2:
-; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
-; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
-; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
-; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
-; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
-; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
-; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]])
-; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
-; IR-DPP-NEXT: [[TMP11:%.*]] = uitofp i32 [[TMP10]] to float
-; IR-DPP-NEXT: [[TMP12:%.*]] = fmul float [[VAL:%.*]], [[TMP11]]
-; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
-; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
-; IR-DPP: 14:
-; IR-DPP-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("agent") monotonic, align 4
-; IR-DPP-NEXT: br label [[TMP16]]
-; IR-DPP: 16:
-; IR-DPP-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
-; IR-DPP-NEXT: [[TMP18:%.*]] = bitcast float [[TMP17]] to i32
-; IR-DPP-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP18]])
-; IR-DPP-NEXT: [[TMP20:%.*]] = bitcast i32 [[TMP19]] to float
-; IR-DPP-NEXT: [[TMP21:%.*]] = uitofp i32 [[TMP8]] to float
-; IR-DPP-NEXT: [[TMP22:%.*]] = fmul float [[VAL]], [[TMP21]]
-; IR-DPP-NEXT: [[TMP23:%.*]] = fadd float [[TMP20]], [[TMP22]]
-; IR-DPP-NEXT: br label [[TMP24]]
-; IR-DPP: 24:
-; IR-DPP-NEXT: [[TMP25:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP23]], [[TMP16]] ]
-; IR-DPP-NEXT: ret float [[TMP25]]
+; IR-LABEL: @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(
+; IR-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
+; IR-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP24:%.*]]
+; IR: 2:
+; IR-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
+; IR-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
+; IR-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
+; IR-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
+; IR-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
+; IR-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
+; IR-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]])
+; IR-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
+; IR-NEXT: [[TMP11:%.*]] = uitofp i32 [[TMP10]] to float
+; IR-NEXT: [[TMP12:%.*]] = fmul float [[VAL:%.*]], [[TMP11]]
+; IR-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
+; IR-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
+; IR: 14:
+; IR-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("agent") monotonic, align 4
+; IR-NEXT: br label [[TMP16]]
+; IR: 16:
+; IR-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
+; IR-NEXT: [[TMP18:%.*]] = bitcast float [[TMP17]] to i32
+; IR-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP18]])
+; IR-NEXT: [[TMP20:%.*]] = bitcast i32 [[TMP19]] to float
+; IR-NEXT: [[TMP21:%.*]] = uitofp i32 [[TMP8]] to float
+; IR-NEXT: [[TMP22:%.*]] = fmul float [[VAL]], [[TMP21]]
+; IR-NEXT: [[TMP23:%.*]] = fadd float [[TMP20]], [[TMP22]]
+; IR-NEXT: br label [[TMP24]]
+; IR: 24:
+; IR-NEXT: [[TMP25:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP23]], [[TMP16]] ]
+; IR-NEXT: ret float [[TMP25]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4
ret float %result
@@ -411,7 +383,6 @@ define amdgpu_ps float @global_atomic_fsub_uni_address_uni_value_agent_scope_str
ret float %result
}
-
define amdgpu_ps float @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, float %val) #2 {
; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp(
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
@@ -514,61 +485,33 @@ define amdgpu_ps float @global_atomic_fsub_uni_address_div_value_agent_scope_str
}
define amdgpu_ps float @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float inreg %val) #0 {
-; IR-ITERATIVE-LABEL: @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(
-; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
-; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP20:%.*]]
-; IR-ITERATIVE: 2:
-; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
-; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
-; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
-; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
-; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
-; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
-; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
-; IR-ITERATIVE-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
-; IR-ITERATIVE: 10:
-; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-ITERATIVE-NEXT: br label [[TMP12]]
-; IR-ITERATIVE: 12:
-; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ]
-; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = bitcast float [[TMP13]] to i32
-; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP14]])
-; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float
-; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = uitofp i32 [[TMP8]] to float
-; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], float 0x7FF0000000000000, float [[VAL]]
-; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call float @llvm.minnum.f32(float [[TMP16]], float [[TMP18]])
-; IR-ITERATIVE-NEXT: br label [[TMP20]]
-; IR-ITERATIVE: 20:
-; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP19]], [[TMP12]] ]
-; IR-ITERATIVE-NEXT: ret float [[TMP21]]
-;
-; IR-DPP-LABEL: @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(
-; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
-; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP20:%.*]]
-; IR-DPP: 2:
-; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
-; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
-; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
-; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
-; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
-; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
-; IR-DPP-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
-; IR-DPP-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
-; IR-DPP: 10:
-; IR-DPP-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-DPP-NEXT: br label [[TMP12]]
-; IR-DPP: 12:
-; IR-DPP-NEXT: [[TMP13:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ]
-; IR-DPP-NEXT: [[TMP14:%.*]] = bitcast float [[TMP13]] to i32
-; IR-DPP-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP14]])
-; IR-DPP-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float
-; IR-DPP-NEXT: [[TMP17:%.*]] = uitofp i32 [[TMP8]] to float
-; IR-DPP-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], float 0x7FF0000000000000, float [[VAL]]
-; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.minnum.f32(float [[TMP16]], float [[TMP18]])
-; IR-DPP-NEXT: br label [[TMP20]]
-; IR-DPP: 20:
-; IR-DPP-NEXT: [[TMP21:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP19]], [[TMP12]] ]
-; IR-DPP-NEXT: ret float [[TMP21]]
+; IR-LABEL: @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(
+; IR-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
+; IR-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP20:%.*]]
+; IR: 2:
+; IR-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
+; IR-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
+; IR-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
+; IR-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
+; IR-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
+; IR-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
+; IR-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
+; IR-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
+; IR: 10:
+; IR-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
+; IR-NEXT: br label [[TMP12]]
+; IR: 12:
+; IR-NEXT: [[TMP13:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ]
+; IR-NEXT: [[TMP14:%.*]] = bitcast float [[TMP13]] to i32
+; IR-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP14]])
+; IR-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float
+; IR-NEXT: [[TMP17:%.*]] = uitofp i32 [[TMP8]] to float
+; IR-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], float 0x7FF0000000000000, float [[VAL]]
+; IR-NEXT: [[TMP19:%.*]] = call float @llvm.minnum.f32(float [[TMP16]], float [[TMP18]])
+; IR-NEXT: br label [[TMP20]]
+; IR: 20:
+; IR-NEXT: [[TMP21:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP19]], [[TMP12]] ]
+; IR-NEXT: ret float [[TMP21]]
;
%result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
ret float %result
@@ -1007,159 +950,109 @@ define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_system_scope_st
ret float %result
}
-
define amdgpu_ps float @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr, float inreg %val) #0 {
-; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-ITERATIVE-NEXT: ret float [[RESULT]]
-;
-; IR-DPP-LABEL: @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-DPP-NEXT: ret float [[RESULT]]
+; IR-LABEL: @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
+; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4
ret float %result
}
define amdgpu_ps float @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr, float %val) #0 {
-; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-ITERATIVE-NEXT: ret float [[RESULT]]
-;
-; IR-DPP-LABEL: @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-DPP-NEXT: ret float [[RESULT]]
+; IR-LABEL: @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
+; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4
ret float %result
}
define amdgpu_ps float @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_structfp(ptr addrspace(1) %ptr, float inreg %val) #1 {
-; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_structfp(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
-; IR-ITERATIVE-NEXT: ret float [[RESULT]]
-;
-; IR-DPP-LABEL: @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_structfp(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
-; IR-DPP-NEXT: ret float [[RESULT]]
+; IR-LABEL: @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_structfp(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
+; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic
ret float %result
}
define amdgpu_ps float @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_structfp(ptr addrspace(1) %ptr, float %val) #1 {
-; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_structfp(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
-; IR-ITERATIVE-NEXT: ret float [[RESULT]]
-;
-; IR-DPP-LABEL: @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_structfp(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
-; IR-DPP-NEXT: ret float [[RESULT]]
+; IR-LABEL: @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_structfp(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
+; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic
ret float %result
}
define amdgpu_ps float @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(ptr addrspace(1) %ptr, float inreg %val) #2 {
-; IR-ITERATIVE-LABEL: @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-ITERATIVE-NEXT: ret float [[RESULT]]
-;
-; IR-DPP-LABEL: @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-DPP-NEXT: ret float [[RESULT]]
+; IR-LABEL: @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
+; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
ret float %result
}
-
define amdgpu_ps float @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(ptr addrspace(1) %ptr, float %val) #2 {
-; IR-ITERATIVE-LABEL: @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-ITERATIVE-NEXT: ret float [[RESULT]]
-;
-; IR-DPP-LABEL: @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-DPP-NEXT: ret float [[RESULT]]
+; IR-LABEL: @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
+; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fsub ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
ret float %result
}
define amdgpu_ps float @global_atomic_fmin_div_address_uni_value_agent_scope(ptr addrspace(1) %ptr, float inreg %val) #0 {
-; IR-ITERATIVE-LABEL: @global_atomic_fmin_div_address_uni_value_agent_scope(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-ITERATIVE-NEXT: ret float [[RESULT]]
-;
-; IR-DPP-LABEL: @global_atomic_fmin_div_address_uni_value_agent_scope(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-DPP-NEXT: ret float [[RESULT]]
+; IR-LABEL: @global_atomic_fmin_div_address_uni_value_agent_scope(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
+; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
ret float %result
}
define amdgpu_ps float @global_atomic_fmin_div_address_div_value_agent_scope(ptr addrspace(1) %ptr, float %val) #0 {
-; IR-ITERATIVE-LABEL: @global_atomic_fmin_div_address_div_value_agent_scope(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-ITERATIVE-NEXT: ret float [[RESULT]]
-;
-; IR-DPP-LABEL: @global_atomic_fmin_div_address_div_value_agent_scope(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-DPP-NEXT: ret float [[RESULT]]
+; IR-LABEL: @global_atomic_fmin_div_address_div_value_agent_scope(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
+; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
ret float %result
}
define amdgpu_ps float @global_atomic_fmax_div_address_uni_value_agent_scope_unsafe_structfp(ptr addrspace(1) %ptr, float inreg %val) #1{
-; IR-ITERATIVE-LABEL: @global_atomic_fmax_div_address_uni_value_agent_scope_unsafe_structfp(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-ITERATIVE-NEXT: ret float [[RESULT]]
-;
-; IR-DPP-LABEL: @global_atomic_fmax_div_address_uni_value_agent_scope_unsafe_structfp(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-DPP-NEXT: ret float [[RESULT]]
+; IR-LABEL: @global_atomic_fmax_div_address_uni_value_agent_scope_unsafe_structfp(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
+; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
ret float %result
}
define amdgpu_ps float @global_atomic_fmax_div_address_div_value_agent_scope_unsafe_structfp(ptr addrspace(1) %ptr, float %val) #1{
-; IR-ITERATIVE-LABEL: @global_atomic_fmax_div_address_div_value_agent_scope_unsafe_structfp(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-ITERATIVE-NEXT: ret float [[RESULT]]
-;
-; IR-DPP-LABEL: @global_atomic_fmax_div_address_div_value_agent_scope_unsafe_structfp(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-DPP-NEXT: ret float [[RESULT]]
+; IR-LABEL: @global_atomic_fmax_div_address_div_value_agent_scope_unsafe_structfp(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
+; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
ret float %result
}
define amdgpu_ps float @global_atomic_fadd_div_address_uni_value_system_scope_strictfp(ptr addrspace(1) %ptr, float inreg %val) #2 {
-; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_address_uni_value_system_scope_strictfp(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4
-; IR-ITERATIVE-NEXT: ret float [[RESULT]]
-;
-; IR-DPP-LABEL: @global_atomic_fadd_div_address_uni_value_system_scope_strictfp(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4
-; IR-DPP-NEXT: ret float [[RESULT]]
+; IR-LABEL: @global_atomic_fadd_div_address_uni_value_system_scope_strictfp(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4
+; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4
ret float %result
}
define amdgpu_ps float @global_atomic_fadd_div_address_div_value_system_scope_strictfp(ptr addrspace(1) %ptr, float %val) #2 {
-; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_address_div_value_system_scope_strictfp(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4
-; IR-ITERATIVE-NEXT: ret float [[RESULT]]
-;
-; IR-DPP-LABEL: @global_atomic_fadd_div_address_div_value_system_scope_strictfp(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4
-; IR-DPP-NEXT: ret float [[RESULT]]
+; IR-LABEL: @global_atomic_fadd_div_address_div_value_system_scope_strictfp(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4
+; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4
ret float %result
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_optimizer_fp_no_rtn.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_optimizer_fp_no_rtn.ll
index f87932b9361ed9..cc7a45cbb6e374 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_optimizer_fp_no_rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_optimizer_fp_no_rtn.ll
@@ -1,55 +1,35 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -mtriple=amdgcn-- -mcpu=gfx906 -amdgpu-atomic-optimizer-strategy=Iterative -passes='amdgpu-atomic-optimizer,verify<domtree>' %s | FileCheck -check-prefix=IR-ITERATIVE %s
-; RUN: opt -S -mtriple=amdgcn-- -mcpu=gfx906 -amdgpu-atomic-optimizer-strategy=DPP -passes='amdgpu-atomic-optimizer,verify<domtree>' %s | FileCheck -check-prefix=IR-DPP %s
+; RUN: opt -S -mtriple=amdgcn-- -mcpu=gfx906 -amdgpu-atomic-optimizer-strategy=Iterative -passes='amdgpu-atomic-optimizer,verify<domtree>' %s | FileCheck --check-prefixes=IR,IR-ITERATIVE %s
+; RUN: opt -S -mtriple=amdgcn-- -mcpu=gfx906 -amdgpu-atomic-optimizer-strategy=DPP -passes='amdgpu-atomic-optimizer,verify<domtree>' %s | FileCheck --check-prefixes=IR,IR-DPP %s
+
+; Tests various combinations of uniform/divergent address and uniform/divergent value inputs of various types for atomic operations.
+; Optimization remains same for Iterative and DPP strategies when value in uniform. These different scan/reduction
+; strategies are valid for only divergent values. This optimization is valid for divergent addresses. Test also covers different scopes.
define amdgpu_ps void @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float inreg %val) #0 {
-; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(
-; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
-; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
-; IR-ITERATIVE: 2:
-; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
-; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
-; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
-; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
-; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
-; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
-; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]])
-; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
-; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = uitofp i32 [[TMP10]] to float
-; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = fmul float [[VAL:%.*]], [[TMP11]]
-; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
-; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
-; IR-ITERATIVE: 14:
-; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("agent") monotonic, align 4
-; IR-ITERATIVE-NEXT: br label [[TMP16]]
-; IR-ITERATIVE: 16:
-; IR-ITERATIVE-NEXT: br label [[TMP17]]
-; IR-ITERATIVE: 17:
-; IR-ITERATIVE-NEXT: ret void
-;
-; IR-DPP-LABEL: @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(
-; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
-; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
-; IR-DPP: 2:
-; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
-; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
-; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
-; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
-; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
-; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
-; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]])
-; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
-; IR-DPP-NEXT: [[TMP11:%.*]] = uitofp i32 [[TMP10]] to float
-; IR-DPP-NEXT: [[TMP12:%.*]] = fmul float [[VAL:%.*]], [[TMP11]]
-; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
-; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
-; IR-DPP: 14:
-; IR-DPP-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("agent") monotonic, align 4
-; IR-DPP-NEXT: br label [[TMP16]]
-; IR-DPP: 16:
-; IR-DPP-NEXT: br label [[TMP17]]
-; IR-DPP: 17:
-; IR-DPP-NEXT: ret void
+; IR-LABEL: @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(
+; IR-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
+; IR-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
+; IR: 2:
+; IR-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
+; IR-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
+; IR-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
+; IR-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
+; IR-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
+; IR-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
+; IR-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]])
+; IR-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
+; IR-NEXT: [[TMP11:%.*]] = uitofp i32 [[TMP10]] to float
+; IR-NEXT: [[TMP12:%.*]] = fmul float [[VAL:%.*]], [[TMP11]]
+; IR-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
+; IR-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
+; IR: 14:
+; IR-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("agent") monotonic, align 4
+; IR-NEXT: br label [[TMP16]]
+; IR: 16:
+; IR-NEXT: br label [[TMP17]]
+; IR: 17:
+; IR-NEXT: ret void
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4
ret void
@@ -325,7 +305,6 @@ define amdgpu_ps void @global_atomic_fsub_uni_address_uni_value_agent_scope_stri
ret void
}
-
define amdgpu_ps void @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, float %val) #2 {
; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp(
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
@@ -409,45 +388,25 @@ define amdgpu_ps void @global_atomic_fsub_uni_address_div_value_agent_scope_stri
}
define amdgpu_ps void @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float inreg %val) #0 {
-; IR-ITERATIVE-LABEL: @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(
-; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
-; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]]
-; IR-ITERATIVE: 2:
-; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
-; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
-; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
-; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
-; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
-; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
-; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
-; IR-ITERATIVE-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
-; IR-ITERATIVE: 10:
-; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-ITERATIVE-NEXT: br label [[TMP12]]
-; IR-ITERATIVE: 12:
-; IR-ITERATIVE-NEXT: br label [[TMP13]]
-; IR-ITERATIVE: 13:
-; IR-ITERATIVE-NEXT: ret void
-;
-; IR-DPP-LABEL: @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(
-; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
-; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]]
-; IR-DPP: 2:
-; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
-; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
-; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
-; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
-; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
-; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
-; IR-DPP-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
-; IR-DPP-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
-; IR-DPP: 10:
-; IR-DPP-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-DPP-NEXT: br label [[TMP12]]
-; IR-DPP: 12:
-; IR-DPP-NEXT: br label [[TMP13]]
-; IR-DPP: 13:
-; IR-DPP-NEXT: ret void
+; IR-LABEL: @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(
+; IR-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
+; IR-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]]
+; IR: 2:
+; IR-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
+; IR-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
+; IR-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
+; IR-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
+; IR-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
+; IR-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
+; IR-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
+; IR-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
+; IR: 10:
+; IR-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
+; IR-NEXT: br label [[TMP12]]
+; IR: 12:
+; IR-NEXT: br label [[TMP13]]
+; IR: 13:
+; IR-NEXT: ret void
;
%result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
ret void
@@ -797,159 +756,109 @@ define amdgpu_ps void @global_atomic_fadd_uni_address_div_value_system_scope_str
ret void
}
-
define amdgpu_ps void @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr, float inreg %val) #0 {
-; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-ITERATIVE-NEXT: ret void
-;
-; IR-DPP-LABEL: @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-DPP-NEXT: ret void
+; IR-LABEL: @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
+; IR-NEXT: ret void
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4
ret void
}
define amdgpu_ps void @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr, float %val) #0 {
-; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-ITERATIVE-NEXT: ret void
-;
-; IR-DPP-LABEL: @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-DPP-NEXT: ret void
+; IR-LABEL: @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
+; IR-NEXT: ret void
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4
ret void
}
define amdgpu_ps void @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_structfp(ptr addrspace(1) %ptr, float inreg %val) #1 {
-; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_structfp(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
-; IR-ITERATIVE-NEXT: ret void
-;
-; IR-DPP-LABEL: @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_structfp(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
-; IR-DPP-NEXT: ret void
+; IR-LABEL: @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_structfp(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
+; IR-NEXT: ret void
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic
ret void
}
define amdgpu_ps void @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_structfp(ptr addrspace(1) %ptr, float %val) #1 {
-; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_structfp(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
-; IR-ITERATIVE-NEXT: ret void
-;
-; IR-DPP-LABEL: @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_structfp(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
-; IR-DPP-NEXT: ret void
+; IR-LABEL: @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_structfp(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
+; IR-NEXT: ret void
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic
ret void
}
define amdgpu_ps void @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(ptr addrspace(1) %ptr, float inreg %val) #2 {
-; IR-ITERATIVE-LABEL: @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-ITERATIVE-NEXT: ret void
-;
-; IR-DPP-LABEL: @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-DPP-NEXT: ret void
+; IR-LABEL: @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
+; IR-NEXT: ret void
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
ret void
}
-
define amdgpu_ps void @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(ptr addrspace(1) %ptr, float %val) #2 {
-; IR-ITERATIVE-LABEL: @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-ITERATIVE-NEXT: ret void
-;
-; IR-DPP-LABEL: @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-DPP-NEXT: ret void
+; IR-LABEL: @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
+; IR-NEXT: ret void
;
%result = atomicrmw fsub ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
ret void
}
define amdgpu_ps void @global_atomic_fmin_div_address_uni_value_agent_scope(ptr addrspace(1) %ptr, float inreg %val) #0 {
-; IR-ITERATIVE-LABEL: @global_atomic_fmin_div_address_uni_value_agent_scope(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-ITERATIVE-NEXT: ret void
-;
-; IR-DPP-LABEL: @global_atomic_fmin_div_address_uni_value_agent_scope(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-DPP-NEXT: ret void
+; IR-LABEL: @global_atomic_fmin_div_address_uni_value_agent_scope(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
+; IR-NEXT: ret void
;
%result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
ret void
}
define amdgpu_ps void @global_atomic_fmin_div_address_div_value_agent_scope(ptr addrspace(1) %ptr, float %val) #0 {
-; IR-ITERATIVE-LABEL: @global_atomic_fmin_div_address_div_value_agent_scope(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-ITERATIVE-NEXT: ret void
-;
-; IR-DPP-LABEL: @global_atomic_fmin_div_address_div_value_agent_scope(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-DPP-NEXT: ret void
+; IR-LABEL: @global_atomic_fmin_div_address_div_value_agent_scope(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
+; IR-NEXT: ret void
;
%result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
ret void
}
define amdgpu_ps void @global_atomic_fmax_div_address_uni_value_agent_scope_unsafe_structfp(ptr addrspace(1) %ptr, float inreg %val) #1{
-; IR-ITERATIVE-LABEL: @global_atomic_fmax_div_address_uni_value_agent_scope_unsafe_structfp(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-ITERATIVE-NEXT: ret void
-;
-; IR-DPP-LABEL: @global_atomic_fmax_div_address_uni_value_agent_scope_unsafe_structfp(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-DPP-NEXT: ret void
+; IR-LABEL: @global_atomic_fmax_div_address_uni_value_agent_scope_unsafe_structfp(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
+; IR-NEXT: ret void
;
%result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
ret void
}
define amdgpu_ps void @global_atomic_fmax_div_address_div_value_agent_scope_unsafe_structfp(ptr addrspace(1) %ptr, float %val) #1{
-; IR-ITERATIVE-LABEL: @global_atomic_fmax_div_address_div_value_agent_scope_unsafe_structfp(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-ITERATIVE-NEXT: ret void
-;
-; IR-DPP-LABEL: @global_atomic_fmax_div_address_div_value_agent_scope_unsafe_structfp(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
-; IR-DPP-NEXT: ret void
+; IR-LABEL: @global_atomic_fmax_div_address_div_value_agent_scope_unsafe_structfp(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
+; IR-NEXT: ret void
;
%result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
ret void
}
define amdgpu_ps void @global_atomic_fadd_div_address_uni_value_system_scope_strictfp(ptr addrspace(1) %ptr, float inreg %val) #2 {
-; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_address_uni_value_system_scope_strictfp(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4
-; IR-ITERATIVE-NEXT: ret void
-;
-; IR-DPP-LABEL: @global_atomic_fadd_div_address_uni_value_system_scope_strictfp(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4
-; IR-DPP-NEXT: ret void
+; IR-LABEL: @global_atomic_fadd_div_address_uni_value_system_scope_strictfp(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4
+; IR-NEXT: ret void
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4
ret void
}
define amdgpu_ps void @global_atomic_fadd_div_address_div_value_system_scope_strictfp(ptr addrspace(1) %ptr, float %val) #2 {
-; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_address_div_value_system_scope_strictfp(
-; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4
-; IR-ITERATIVE-NEXT: ret void
-;
-; IR-DPP-LABEL: @global_atomic_fadd_div_address_div_value_system_scope_strictfp(
-; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4
-; IR-DPP-NEXT: ret void
+; IR-LABEL: @global_atomic_fadd_div_address_div_value_system_scope_strictfp(
+; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4
+; IR-NEXT: ret void
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4
ret void
More information about the llvm-commits
mailing list