[llvm] 03d9250 - [AMDGPU] Enable Atomic Optimizer and Default to Iterative Scan Strategy.
Pravin Jagtap via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 14 22:22:34 PDT 2023
Author: Pravin Jagtap
Date: 2023-06-15T01:18:38-04:00
New Revision: 03d92501f385bac5d0b25742b9f368a90e103621
URL: https://github.com/llvm/llvm-project/commit/03d92501f385bac5d0b25742b9f368a90e103621
DIFF: https://github.com/llvm/llvm-project/commit/03d92501f385bac5d0b25742b9f368a90e103621.diff
LOG: [AMDGPU] Enable Atomic Optimizer and Default to Iterative Scan Strategy.
The D147408 implemented new Iterative approach for scan computations
and added new flag `amdgpu-atomic-optimizer-strategy` which is
defaulted to DPP.
The changeset https://github.com/GPUOpen-Drivers/llpc/pull/2506
adapts to the new changes in LLPC.
This patch enables atomic optimizer pass and selects Iterative
approach for scan computations by default for compute pipeline.
Reviewed By: foad
Differential Revision: https://reviews.llvm.org/D152649
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll
llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll
llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll
llvm/test/CodeGen/AMDGPU/gds-allocation.ll
llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll
llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 3c46fabd310d0..c3f7d753e1979 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -278,12 +278,12 @@ static cl::opt<bool> OptVGPRLiveRange(
static cl::opt<bool>
EnableAtomicOptimizations("amdgpu-atomic-optimizations",
cl::desc("Enable atomic optimizations"),
- cl::init(false), cl::Hidden);
+ cl::init(true), cl::Hidden);
static cl::opt<ScanOptions> AMDGPUAtomicOptimizerStrategy(
"amdgpu-atomic-optimizer-strategy",
cl::desc("Select DPP or Iterative strategy for scan"),
- cl::init(ScanOptions::DPP),
+ cl::init(ScanOptions::Iterative),
cl::values(clEnumValN(ScanOptions::DPP, "DPP",
"Use DPP operations for scan"),
clEnumValN(ScanOptions::Iterative, "Iterative",
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll
index 4e1728a369dc0..3f04a4b5bed14 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -amdgpu-atomic-optimizations=false < %s | FileCheck -check-prefix=GFX6 %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -amdgpu-atomic-optimizations=false < %s | FileCheck -check-prefix=GFX7 %s
; Test end to end matching of addressing modes when MUBUF is used for
; global memory.
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
index 2a41877502677..b76fdbcf47300 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-- -amdgpu-atomic-optimizations=true -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -enable-var-scope -check-prefixes=GFX7 %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -amdgpu-atomic-optimizations=true -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -enable-var-scope -check-prefixes=GFX89,GFX8 %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-atomic-optimizations=true -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -enable-var-scope -check-prefixes=GFX89,GFX9 %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -mattr=-flat-for-global -amdgpu-atomic-optimizations=true -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1064 %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -mattr=-flat-for-global -amdgpu-atomic-optimizations=true -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1032 %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -mattr=-flat-for-global -amdgpu-atomic-optimizations=true -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1164 %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -mattr=-flat-for-global -amdgpu-atomic-optimizations=true -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1132 %s
+; RUN: llc -mtriple=amdgcn-- -amdgpu-atomic-optimizations=true -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -enable-var-scope -check-prefixes=GFX7 %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -amdgpu-atomic-optimizations=true -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -enable-var-scope -check-prefixes=GFX89,GFX8 %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-atomic-optimizations=true -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -enable-var-scope -check-prefixes=GFX89,GFX9 %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -mattr=-flat-for-global -amdgpu-atomic-optimizations=true -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1064 %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -mattr=-flat-for-global -amdgpu-atomic-optimizations=true -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1032 %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -mattr=-flat-for-global -amdgpu-atomic-optimizations=true -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1164 %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -mattr=-flat-for-global -amdgpu-atomic-optimizations=true -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1132 %s
declare i1 @llvm.amdgcn.wqm.vote(i1)
declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add(i32, ptr addrspace(8), i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll b/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll
index 5eae0e126c08d..c005a5a58c77a 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs -amdgpu-atomic-optimizations=false -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GCN %s
define amdgpu_cs void @mmo_offsets0(ptr addrspace(6) inreg noalias align(16) dereferenceable(18446744073709551615) %arg0, i32 %arg1) {
; GCN-LABEL: name: mmo_offsets0
diff --git a/llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll b/llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll
index b079fe124ca74..dddf9ce597308 100644
--- a/llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll
+++ b/llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -march=amdgcn -mcpu=gfx90a -o - %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=gfx90a -amdgpu-atomic-optimizations=false -o - %s | FileCheck %s
%S = type <{ float, double }>
diff --git a/llvm/test/CodeGen/AMDGPU/gds-allocation.ll b/llvm/test/CodeGen/AMDGPU/gds-allocation.ll
index cef5971628939..37218dc6ccb88 100644
--- a/llvm/test/CodeGen/AMDGPU/gds-allocation.ll
+++ b/llvm/test/CodeGen/AMDGPU/gds-allocation.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
@gds0 = internal addrspace(2) global [4 x i32] undef, align 4
@lds0 = internal addrspace(3) global [4 x i32] undef, align 128
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index 119764df072dd..fe4d0224b038b 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -251,11 +251,14 @@
; GCN-O1-NEXT: Cycle Info Analysis
; GCN-O1-NEXT: Uniformity Analysis
; GCN-O1-NEXT: AMDGPU IR late optimizations
+; GCN-O1-NEXT: AMDGPU atomic optimizations
; GCN-O1-NEXT: Basic Alias Analysis (stateless AA impl)
; GCN-O1-NEXT: Function Alias Analysis Results
; GCN-O1-NEXT: Natural Loop Information
; GCN-O1-NEXT: Code sinking
; GCN-O1-NEXT: Post-Dominator Tree Construction
+; GCN-O1-NEXT: Cycle Info Analysis
+; GCN-O1-NEXT: Uniformity Analysis
; GCN-O1-NEXT: Unify divergent function exit nodes
; GCN-O1-NEXT: Lazy Value Information Analysis
; GCN-O1-NEXT: Lower SwitchInst's to branches
@@ -541,11 +544,14 @@
; GCN-O1-OPTS-NEXT: Cycle Info Analysis
; GCN-O1-OPTS-NEXT: Uniformity Analysis
; GCN-O1-OPTS-NEXT: AMDGPU IR late optimizations
+; GCN-O1-OPTS-NEXT: AMDGPU atomic optimizations
; GCN-O1-OPTS-NEXT: Basic Alias Analysis (stateless AA impl)
; GCN-O1-OPTS-NEXT: Function Alias Analysis Results
; GCN-O1-OPTS-NEXT: Natural Loop Information
; GCN-O1-OPTS-NEXT: Code sinking
; GCN-O1-OPTS-NEXT: Post-Dominator Tree Construction
+; GCN-O1-OPTS-NEXT: Cycle Info Analysis
+; GCN-O1-OPTS-NEXT: Uniformity Analysis
; GCN-O1-OPTS-NEXT: Unify divergent function exit nodes
; GCN-O1-OPTS-NEXT: Lazy Value Information Analysis
; GCN-O1-OPTS-NEXT: Lower SwitchInst's to branches
@@ -847,11 +853,14 @@
; GCN-O2-NEXT: Cycle Info Analysis
; GCN-O2-NEXT: Uniformity Analysis
; GCN-O2-NEXT: AMDGPU IR late optimizations
+; GCN-O2-NEXT: AMDGPU atomic optimizations
; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl)
; GCN-O2-NEXT: Function Alias Analysis Results
; GCN-O2-NEXT: Natural Loop Information
; GCN-O2-NEXT: Code sinking
; GCN-O2-NEXT: Post-Dominator Tree Construction
+; GCN-O2-NEXT: Cycle Info Analysis
+; GCN-O2-NEXT: Uniformity Analysis
; GCN-O2-NEXT: Unify divergent function exit nodes
; GCN-O2-NEXT: Lazy Value Information Analysis
; GCN-O2-NEXT: Lower SwitchInst's to branches
@@ -1168,11 +1177,14 @@
; GCN-O3-NEXT: Cycle Info Analysis
; GCN-O3-NEXT: Uniformity Analysis
; GCN-O3-NEXT: AMDGPU IR late optimizations
+; GCN-O3-NEXT: AMDGPU atomic optimizations
; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl)
; GCN-O3-NEXT: Function Alias Analysis Results
; GCN-O3-NEXT: Natural Loop Information
; GCN-O3-NEXT: Code sinking
; GCN-O3-NEXT: Post-Dominator Tree Construction
+; GCN-O3-NEXT: Cycle Info Analysis
+; GCN-O3-NEXT: Uniformity Analysis
; GCN-O3-NEXT: Unify divergent function exit nodes
; GCN-O3-NEXT: Lazy Value Information Analysis
; GCN-O3-NEXT: Lower SwitchInst's to branches
diff --git a/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll b/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll
index 2d072531088ce..82515e721908f 100644
--- a/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -march=amdgcn -mcpu=gfx900 -amdgpu-aa -amdgpu-aa-wrapper -amdgpu-annotate-uniform -S --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs --amdgpu-lower-module-lds-strategy=module < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs --amdgpu-lower-module-lds-strategy=module -amdgpu-atomic-optimizations=false < %s | FileCheck -check-prefix=GCN %s
; Check that barrier or fence in between of loads is not considered a clobber
; for the purpose of converting vector loads into scalar.
diff --git a/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll b/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll
index 2d8680ea030ce..820b37153715d 100644
--- a/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll
+++ b/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
define amdgpu_cs void @should_not_hoist_set_inactive(<4 x i32> inreg %i14, i32 inreg %v, i32 %lane, i32 %f, i32 %f2) #0 {
; GCN-LABEL: should_not_hoist_set_inactive:
More information about the llvm-commits
mailing list