[llvm] [AMDGPU] Move GCNPreRAOptimizations after MachineScheduler (PR #116211)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 14 03:13:14 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Jay Foad (jayfoad)
<details>
<summary>Changes</summary>
This is in preparation for adding a new optimization the pass that cares
about the order of instructions. The existing optimization does not
care, so this just causes minor codegen differences.
---
Patch is 20.70 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/116211.diff
7 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/llc-pipeline.ll (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll (+2-2)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.get.rounding.ll (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll (+45-45)
- (modified) llvm/test/CodeGen/AMDGPU/offset-split-global.ll (+6-6)
- (modified) llvm/test/CodeGen/AMDGPU/roundeven.ll (+3-3)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 7cfff7c2f8ac0a..41e24544778ab3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1433,7 +1433,7 @@ void GCNPassConfig::addOptimizedRegAlloc() {
insertPass(&RenameIndependentSubregsID, &GCNRewritePartialRegUsesID);
if (isPassEnabled(EnablePreRAOptimizations))
- insertPass(&RenameIndependentSubregsID, &GCNPreRAOptimizationsID);
+ insertPass(&MachineSchedulerID, &GCNPreRAOptimizationsID);
// Allow the scheduler to run before SIWholeQuadMode inserts exec manipulation
// instructions that cause scheduling barriers.
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index c0a87cf4ceacfa..e77f4f69e265bb 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -655,8 +655,8 @@
; GCN-O1-OPTS-NEXT: Register Coalescer
; GCN-O1-OPTS-NEXT: Rename Disconnected Subregister Components
; GCN-O1-OPTS-NEXT: Rewrite Partial Register Uses
-; GCN-O1-OPTS-NEXT: AMDGPU Pre-RA optimizations
; GCN-O1-OPTS-NEXT: Machine Instruction Scheduler
+; GCN-O1-OPTS-NEXT: AMDGPU Pre-RA optimizations
; GCN-O1-OPTS-NEXT: SI Whole Quad Mode
; GCN-O1-OPTS-NEXT: SI optimize exec mask operations pre-RA
; GCN-O1-OPTS-NEXT: AMDGPU Pre-RA Long Branch Reg
@@ -968,8 +968,8 @@
; GCN-O2-NEXT: Register Coalescer
; GCN-O2-NEXT: Rename Disconnected Subregister Components
; GCN-O2-NEXT: Rewrite Partial Register Uses
-; GCN-O2-NEXT: AMDGPU Pre-RA optimizations
; GCN-O2-NEXT: Machine Instruction Scheduler
+; GCN-O2-NEXT: AMDGPU Pre-RA optimizations
; GCN-O2-NEXT: SI Whole Quad Mode
; GCN-O2-NEXT: SI optimize exec mask operations pre-RA
; GCN-O2-NEXT: SI Form memory clauses
@@ -1295,8 +1295,8 @@
; GCN-O3-NEXT: Register Coalescer
; GCN-O3-NEXT: Rename Disconnected Subregister Components
; GCN-O3-NEXT: Rewrite Partial Register Uses
-; GCN-O3-NEXT: AMDGPU Pre-RA optimizations
; GCN-O3-NEXT: Machine Instruction Scheduler
+; GCN-O3-NEXT: AMDGPU Pre-RA optimizations
; GCN-O3-NEXT: SI Whole Quad Mode
; GCN-O3-NEXT: SI optimize exec mask operations pre-RA
; GCN-O3-NEXT: SI Form memory clauses
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll
index 2f82ceb37eb90c..7283ec88a90d83 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll
@@ -76,10 +76,10 @@ define amdgpu_cs void @constant_mask_inverse_ballot(ptr addrspace(1) %out) {
; SDAG-LABEL: constant_mask_inverse_ballot:
; SDAG: ; %bb.0: ; %entry
; SDAG-NEXT: s_mov_b32 s0, 0xf8010000
-; SDAG-NEXT: s_mov_b32 s2, 0
; SDAG-NEXT: s_mov_b32 s1, 64
-; SDAG-NEXT: v_mov_b32_e32 v3, s2
+; SDAG-NEXT: s_mov_b32 s2, 0
; SDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
+; SDAG-NEXT: v_mov_b32_e32 v3, s2
; SDAG-NEXT: global_store_b64 v[0:1], v[2:3], off
; SDAG-NEXT: s_endpgm
entry:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.get.rounding.ll b/llvm/test/CodeGen/AMDGPU/llvm.get.rounding.ll
index 48f6beb60f01ea..71d5747f5eeced 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.get.rounding.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.get.rounding.ll
@@ -42,10 +42,10 @@ define i32 @func_rounding() {
; GFX10-LABEL: func_rounding:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4)
-; GFX10-NEXT: s_lshl_b32 s6, s4, 2
+; GFX10-NEXT: s_getreg_b32 s6, hwreg(HW_REG_MODE, 0, 4)
; GFX10-NEXT: s_mov_b32 s4, 0xeb24da71
; GFX10-NEXT: s_mov_b32 s5, 0xc96f385
+; GFX10-NEXT: s_lshl_b32 s6, s6, 2
; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
; GFX10-NEXT: s_and_b32 s4, s4, 15
; GFX10-NEXT: s_add_i32 s5, s4, 4
@@ -57,10 +57,10 @@ define i32 @func_rounding() {
; GFX11-LABEL: func_rounding:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
-; GFX11-NEXT: s_lshl_b32 s2, s0, 2
+; GFX11-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4)
; GFX11-NEXT: s_mov_b32 s0, 0xeb24da71
; GFX11-NEXT: s_mov_b32 s1, 0xc96f385
+; GFX11-NEXT: s_lshl_b32 s2, s2, 2
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_and_b32 s0, s0, 15
; GFX11-NEXT: s_add_i32 s1, s0, 4
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll b/llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll
index d69aae0b737473..ca7f56d9ff3453 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll
@@ -38,10 +38,10 @@ define amdgpu_gfx void @s_set_rounding(i32 inreg %rounding) {
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_add_i32 s34, s4, -4
-; GFX10-NEXT: s_min_u32 s34, s4, s34
-; GFX10-NEXT: s_lshl_b32 s36, s34, 2
+; GFX10-NEXT: s_min_u32 s36, s4, s34
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
+; GFX10-NEXT: s_lshl_b32 s36, s36, 2
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -50,10 +50,10 @@ define amdgpu_gfx void @s_set_rounding(i32 inreg %rounding) {
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_add_i32 s0, s4, -4
-; GFX11-NEXT: s_min_u32 s0, s4, s0
-; GFX11-NEXT: s_lshl_b32 s2, s0, 2
+; GFX11-NEXT: s_min_u32 s2, s4, s0
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
+; GFX11-NEXT: s_lshl_b32 s2, s2, 2
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -124,14 +124,14 @@ define amdgpu_kernel void @s_set_rounding_kernel(i32 inreg %rounding) {
;
; GFX10-LABEL: s_set_rounding_kernel:
; GFX10: ; %bb.0:
-; GFX10-NEXT: s_load_dword s0, s[4:5], 0x24
+; GFX10-NEXT: s_load_dword s2, s[4:5], 0x24
+; GFX10-NEXT: s_mov_b32 s0, 0x1c84a50f
+; GFX10-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: s_add_i32 s1, s0, -4
-; GFX10-NEXT: s_min_u32 s2, s0, s1
-; GFX10-NEXT: s_mov_b32 s0, 0x1c84a50f
-; GFX10-NEXT: s_mov_b32 s1, 0xb73e62d9
+; GFX10-NEXT: s_add_i32 s3, s2, -4
+; GFX10-NEXT: s_min_u32 s2, s2, s3
; GFX10-NEXT: s_lshl_b32 s2, s2, 2
; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
@@ -139,14 +139,14 @@ define amdgpu_kernel void @s_set_rounding_kernel(i32 inreg %rounding) {
;
; GFX11-LABEL: s_set_rounding_kernel:
; GFX11: ; %bb.0:
-; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x24
+; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x24
+; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
+; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: s_add_i32 s1, s0, -4
-; GFX11-NEXT: s_min_u32 s2, s0, s1
-; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
-; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
+; GFX11-NEXT: s_add_i32 s3, s2, -4
+; GFX11-NEXT: s_min_u32 s2, s2, s3
; GFX11-NEXT: s_lshl_b32 s2, s2, 2
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
@@ -294,10 +294,10 @@ define void @set_rounding_get_rounding() {
; GFX10-NEXT: s_cmp_lt_u32 s4, 4
; GFX10-NEXT: s_cselect_b32 s4, s4, s5
; GFX10-NEXT: s_add_i32 s5, s4, -4
-; GFX10-NEXT: s_min_u32 s4, s4, s5
-; GFX10-NEXT: s_lshl_b32 s6, s4, 2
+; GFX10-NEXT: s_min_u32 s6, s4, s5
; GFX10-NEXT: s_mov_b32 s4, 0x1c84a50f
; GFX10-NEXT: s_mov_b32 s5, 0xb73e62d9
+; GFX10-NEXT: s_lshl_b32 s6, s6, 2
; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -315,10 +315,10 @@ define void @set_rounding_get_rounding() {
; GFX11-NEXT: s_cmp_lt_u32 s0, 4
; GFX11-NEXT: s_cselect_b32 s0, s0, s1
; GFX11-NEXT: s_add_i32 s1, s0, -4
-; GFX11-NEXT: s_min_u32 s0, s0, s1
-; GFX11-NEXT: s_lshl_b32 s2, s0, 2
+; GFX11-NEXT: s_min_u32 s2, s0, s1
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
+; GFX11-NEXT: s_lshl_b32 s2, s2, 2
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -974,10 +974,10 @@ define amdgpu_gfx void @s_set_rounding_i2_signext(i2 signext inreg %rounding) {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_sext_i32_i16 s34, s4
; GFX10-NEXT: s_add_i32 s35, s34, -4
-; GFX10-NEXT: s_min_u32 s34, s34, s35
-; GFX10-NEXT: s_lshl_b32 s36, s34, 2
+; GFX10-NEXT: s_min_u32 s36, s34, s35
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
+; GFX10-NEXT: s_lshl_b32 s36, s36, 2
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -987,10 +987,10 @@ define amdgpu_gfx void @s_set_rounding_i2_signext(i2 signext inreg %rounding) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_sext_i32_i16 s0, s4
; GFX11-NEXT: s_add_i32 s1, s0, -4
-; GFX11-NEXT: s_min_u32 s0, s0, s1
-; GFX11-NEXT: s_lshl_b32 s2, s0, 2
+; GFX11-NEXT: s_min_u32 s2, s0, s1
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
+; GFX11-NEXT: s_lshl_b32 s2, s2, 2
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -1055,10 +1055,10 @@ define amdgpu_gfx void @s_set_rounding_i3_signext(i3 signext inreg %rounding) {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_sext_i32_i16 s34, s4
; GFX10-NEXT: s_add_i32 s35, s34, -4
-; GFX10-NEXT: s_min_u32 s34, s34, s35
-; GFX10-NEXT: s_lshl_b32 s36, s34, 2
+; GFX10-NEXT: s_min_u32 s36, s34, s35
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
+; GFX10-NEXT: s_lshl_b32 s36, s36, 2
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -1068,10 +1068,10 @@ define amdgpu_gfx void @s_set_rounding_i3_signext(i3 signext inreg %rounding) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_sext_i32_i16 s0, s4
; GFX11-NEXT: s_add_i32 s1, s0, -4
-; GFX11-NEXT: s_min_u32 s0, s0, s1
-; GFX11-NEXT: s_lshl_b32 s2, s0, 2
+; GFX11-NEXT: s_min_u32 s2, s0, s1
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
+; GFX11-NEXT: s_lshl_b32 s2, s2, 2
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -1136,10 +1136,10 @@ define amdgpu_gfx void @s_set_rounding_i3_zeroext(i3 zeroext inreg %rounding) {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: s_and_b32 s34, 0xffff, s4
; GFX10-NEXT: s_add_i32 s35, s34, -4
-; GFX10-NEXT: s_min_u32 s34, s34, s35
-; GFX10-NEXT: s_lshl_b32 s36, s34, 2
+; GFX10-NEXT: s_min_u32 s36, s34, s35
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
+; GFX10-NEXT: s_lshl_b32 s36, s36, 2
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -1149,10 +1149,10 @@ define amdgpu_gfx void @s_set_rounding_i3_zeroext(i3 zeroext inreg %rounding) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_and_b32 s0, 0xffff, s4
; GFX11-NEXT: s_add_i32 s1, s0, -4
-; GFX11-NEXT: s_min_u32 s0, s0, s1
-; GFX11-NEXT: s_lshl_b32 s2, s0, 2
+; GFX11-NEXT: s_min_u32 s2, s0, s1
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
+; GFX11-NEXT: s_lshl_b32 s2, s2, 2
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -1569,10 +1569,10 @@ define amdgpu_gfx void @s_set_rounding_select_4_0(i32 inreg %cond) {
; GFX10-NEXT: v_readfirstlane_b32 s34, v0
; GFX10-NEXT: s_lshl_b32 s34, s34, 2
; GFX10-NEXT: s_add_i32 s35, s34, -4
-; GFX10-NEXT: s_min_u32 s34, s34, s35
-; GFX10-NEXT: s_lshl_b32 s36, s34, 2
+; GFX10-NEXT: s_min_u32 s36, s34, s35
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
+; GFX10-NEXT: s_lshl_b32 s36, s36, 2
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -1586,10 +1586,10 @@ define amdgpu_gfx void @s_set_rounding_select_4_0(i32 inreg %cond) {
; GFX11-NEXT: v_readfirstlane_b32 s0, v0
; GFX11-NEXT: s_lshl_b32 s0, s0, 2
; GFX11-NEXT: s_add_i32 s1, s0, -4
-; GFX11-NEXT: s_min_u32 s0, s0, s1
-; GFX11-NEXT: s_lshl_b32 s2, s0, 2
+; GFX11-NEXT: s_min_u32 s2, s0, s1
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
+; GFX11-NEXT: s_lshl_b32 s2, s2, 2
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -1634,10 +1634,10 @@ define amdgpu_gfx void @s_set_rounding_select_3_5(i32 inreg %cond) {
; GFX10-NEXT: s_cmp_eq_u32 s4, 0
; GFX10-NEXT: s_cselect_b32 s34, 3, 5
; GFX10-NEXT: s_add_i32 s35, s34, -4
-; GFX10-NEXT: s_min_u32 s34, s34, s35
-; GFX10-NEXT: s_lshl_b32 s36, s34, 2
+; GFX10-NEXT: s_min_u32 s36, s34, s35
; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
+; GFX10-NEXT: s_lshl_b32 s36, s36, 2
; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -1648,10 +1648,10 @@ define amdgpu_gfx void @s_set_rounding_select_3_5(i32 inreg %cond) {
; GFX11-NEXT: s_cmp_eq_u32 s4, 0
; GFX11-NEXT: s_cselect_b32 s0, 3, 5
; GFX11-NEXT: s_add_i32 s1, s0, -4
-; GFX11-NEXT: s_min_u32 s0, s0, s1
-; GFX11-NEXT: s_lshl_b32 s2, s0, 2
+; GFX11-NEXT: s_min_u32 s2, s0, s1
; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
+; GFX11-NEXT: s_lshl_b32 s2, s2, 2
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -1747,13 +1747,13 @@ define amdgpu_kernel void @get_rounding_after_set_rounding_1() {
; GFX10-LABEL: get_rounding_after_set_rounding_1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_round_mode 0x0
-; GFX10-NEXT: v_mov_b32_e32 v0, 0
-; GFX10-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
-; GFX10-NEXT: v_mov_b32_e32 v1, 0
-; GFX10-NEXT: s_lshl_b32 s2, s0, 2
; GFX10-NEXT: s_mov_b32 s0, 0xeb24da71
+; GFX10-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4)
; GFX10-NEXT: s_mov_b32 s1, 0xc96f385
+; GFX10-NEXT: s_lshl_b32 s2, s2, 2
+; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
+; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_and_b32 s0, s0, 15
; GFX10-NEXT: s_add_i32 s1, s0, 4
; GFX10-NEXT: s_cmp_lt_u32 s0, 4
@@ -1766,11 +1766,11 @@ define amdgpu_kernel void @get_rounding_after_set_rounding_1() {
; GFX11-LABEL: get_rounding_after_set_rounding_1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_round_mode 0x0
-; GFX11-NEXT: v_mov_b32_e32 v0, 0
-; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
-; GFX11-NEXT: s_lshl_b32 s2, s0, 2
; GFX11-NEXT: s_mov_b32 s0, 0xeb24da71
+; GFX11-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4)
; GFX11-NEXT: s_mov_b32 s1, 0xc96f385
+; GFX11-NEXT: s_lshl_b32 s2, s2, 2
+; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
; GFX11-NEXT: s_and_b32 s0, s0, 15
; GFX11-NEXT: s_add_i32 s1, s0, 4
diff --git a/llvm/test/CodeGen/AMDGPU/offset-split-global.ll b/llvm/test/CodeGen/AMDGPU/offset-split-global.ll
index 4ebbb10fae187c..fd62ba3f9da1f8 100644
--- a/llvm/test/CodeGen/AMDGPU/offset-split-global.ll
+++ b/llvm/test/CodeGen/AMDGPU/offset-split-global.ll
@@ -3185,8 +3185,8 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split0(p
; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX12-SDAG-NEXT: s_movk_i32 s2, 0x7ff
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX12-SDAG-NEXT: s_brev_b32 s3, 1
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
@@ -3253,8 +3253,8 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split1(p
; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX12-SDAG-NEXT: s_movk_i32 s2, 0x800
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX12-SDAG-NEXT: s_brev_b32 s3, 1
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
@@ -3321,8 +3321,8 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split0(p
; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX12-SDAG-NEXT: s_movk_i32 s2, 0xfff
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX12-SDAG-NEXT: s_brev_b32 s3, 1
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
@@ -3389,8 +3389,8 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split1(p
; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX12-SDAG-NEXT: s_movk_i32 s2, 0x1000
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX12-SDAG-NEXT: s_brev_b32 s3, 1
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
@@ -3457,8 +3457,8 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split0(p
; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX12-SDAG-NEXT: s_movk_i32 s2, 0x1fff
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX12-SDAG-NEXT: s_brev_b32 s3, 1
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
@@ -3525,8 +3525,8 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split1(p
; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX12-SDAG-NEXT: s_movk_i32 s2, 0x2000
+; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX12-SDAG-NEXT: s_brev_b32 s3, 1
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
diff --git a/llvm/test/CodeGen/AMDGPU/roundeven.ll b/llvm/test/CodeGen/AMDGPU/roundeven.ll
index 0f95c0255d3abc..0aff5ca25149fc 100644
--- a/llvm/test/CodeGen/AMDGPU/roundeven.ll
+++ b/llvm/test/CodeGen/AMDGPU/roundeven.ll
@@ -1043,12 +1043,12 @@ define double @v_roundeven_f64(dou...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/116211
More information about the llvm-commits
mailing list