[llvm] 89cb0ee - [AMDGPU] Move GCNPreRAOptimizations after MachineScheduler (#116211)

via llvm-commits llvm-commits at lists.llvm.org
Sat Nov 16 01:40:50 PST 2024


Author: Jay Foad
Date: 2024-11-16T09:40:46Z
New Revision: 89cb0eefcbb6303ba6813238d5ad37b103495d11

URL: https://github.com/llvm/llvm-project/commit/89cb0eefcbb6303ba6813238d5ad37b103495d11
DIFF: https://github.com/llvm/llvm-project/commit/89cb0eefcbb6303ba6813238d5ad37b103495d11.diff

LOG: [AMDGPU] Move GCNPreRAOptimizations after MachineScheduler (#116211)

This is in preparation for adding a new optimization to the pass that
cares about the order of instructions. The existing optimization does
not care, so this just causes minor codegen differences.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
    llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll
    llvm/test/CodeGen/AMDGPU/llvm.get.rounding.ll
    llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll
    llvm/test/CodeGen/AMDGPU/offset-split-global.ll
    llvm/test/CodeGen/AMDGPU/roundeven.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 603339e200dde9..da18f2b20f1427 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1434,7 +1434,7 @@ void GCNPassConfig::addOptimizedRegAlloc() {
     insertPass(&RenameIndependentSubregsID, &GCNRewritePartialRegUsesID);
 
   if (isPassEnabled(EnablePreRAOptimizations))
-    insertPass(&RenameIndependentSubregsID, &GCNPreRAOptimizationsID);
+    insertPass(&MachineSchedulerID, &GCNPreRAOptimizationsID);
 
   // Allow the scheduler to run before SIWholeQuadMode inserts exec manipulation
   // instructions that cause scheduling barriers.

diff  --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index c0a87cf4ceacfa..e77f4f69e265bb 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -655,8 +655,8 @@
 ; GCN-O1-OPTS-NEXT:        Register Coalescer
 ; GCN-O1-OPTS-NEXT:        Rename Disconnected Subregister Components
 ; GCN-O1-OPTS-NEXT:        Rewrite Partial Register Uses
-; GCN-O1-OPTS-NEXT:        AMDGPU Pre-RA optimizations
 ; GCN-O1-OPTS-NEXT:        Machine Instruction Scheduler
+; GCN-O1-OPTS-NEXT:        AMDGPU Pre-RA optimizations
 ; GCN-O1-OPTS-NEXT:        SI Whole Quad Mode
 ; GCN-O1-OPTS-NEXT:        SI optimize exec mask operations pre-RA
 ; GCN-O1-OPTS-NEXT:        AMDGPU Pre-RA Long Branch Reg
@@ -968,8 +968,8 @@
 ; GCN-O2-NEXT:        Register Coalescer
 ; GCN-O2-NEXT:        Rename Disconnected Subregister Components
 ; GCN-O2-NEXT:        Rewrite Partial Register Uses
-; GCN-O2-NEXT:        AMDGPU Pre-RA optimizations
 ; GCN-O2-NEXT:        Machine Instruction Scheduler
+; GCN-O2-NEXT:        AMDGPU Pre-RA optimizations
 ; GCN-O2-NEXT:        SI Whole Quad Mode
 ; GCN-O2-NEXT:        SI optimize exec mask operations pre-RA
 ; GCN-O2-NEXT:        SI Form memory clauses
@@ -1295,8 +1295,8 @@
 ; GCN-O3-NEXT:        Register Coalescer
 ; GCN-O3-NEXT:        Rename Disconnected Subregister Components
 ; GCN-O3-NEXT:        Rewrite Partial Register Uses
-; GCN-O3-NEXT:        AMDGPU Pre-RA optimizations
 ; GCN-O3-NEXT:        Machine Instruction Scheduler
+; GCN-O3-NEXT:        AMDGPU Pre-RA optimizations
 ; GCN-O3-NEXT:        SI Whole Quad Mode
 ; GCN-O3-NEXT:        SI optimize exec mask operations pre-RA
 ; GCN-O3-NEXT:        SI Form memory clauses

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll
index 2f82ceb37eb90c..7283ec88a90d83 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll
@@ -76,10 +76,10 @@ define amdgpu_cs void @constant_mask_inverse_ballot(ptr addrspace(1) %out) {
 ; SDAG-LABEL: constant_mask_inverse_ballot:
 ; SDAG:       ; %bb.0: ; %entry
 ; SDAG-NEXT:    s_mov_b32 s0, 0xf8010000
-; SDAG-NEXT:    s_mov_b32 s2, 0
 ; SDAG-NEXT:    s_mov_b32 s1, 64
-; SDAG-NEXT:    v_mov_b32_e32 v3, s2
+; SDAG-NEXT:    s_mov_b32 s2, 0
 ; SDAG-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[0:1]
+; SDAG-NEXT:    v_mov_b32_e32 v3, s2
 ; SDAG-NEXT:    global_store_b64 v[0:1], v[2:3], off
 ; SDAG-NEXT:    s_endpgm
 entry:

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.get.rounding.ll b/llvm/test/CodeGen/AMDGPU/llvm.get.rounding.ll
index 48f6beb60f01ea..71d5747f5eeced 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.get.rounding.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.get.rounding.ll
@@ -42,10 +42,10 @@ define i32 @func_rounding() {
 ; GFX10-LABEL: func_rounding:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT:    s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4)
-; GFX10-NEXT:    s_lshl_b32 s6, s4, 2
+; GFX10-NEXT:    s_getreg_b32 s6, hwreg(HW_REG_MODE, 0, 4)
 ; GFX10-NEXT:    s_mov_b32 s4, 0xeb24da71
 ; GFX10-NEXT:    s_mov_b32 s5, 0xc96f385
+; GFX10-NEXT:    s_lshl_b32 s6, s6, 2
 ; GFX10-NEXT:    s_lshr_b64 s[4:5], s[4:5], s6
 ; GFX10-NEXT:    s_and_b32 s4, s4, 15
 ; GFX10-NEXT:    s_add_i32 s5, s4, 4
@@ -57,10 +57,10 @@ define i32 @func_rounding() {
 ; GFX11-LABEL: func_rounding:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
-; GFX11-NEXT:    s_lshl_b32 s2, s0, 2
+; GFX11-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4)
 ; GFX11-NEXT:    s_mov_b32 s0, 0xeb24da71
 ; GFX11-NEXT:    s_mov_b32 s1, 0xc96f385
+; GFX11-NEXT:    s_lshl_b32 s2, s2, 2
 ; GFX11-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
 ; GFX11-NEXT:    s_and_b32 s0, s0, 15
 ; GFX11-NEXT:    s_add_i32 s1, s0, 4

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll b/llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll
index d69aae0b737473..ca7f56d9ff3453 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll
@@ -38,10 +38,10 @@ define amdgpu_gfx void @s_set_rounding(i32 inreg %rounding) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_add_i32 s34, s4, -4
-; GFX10-NEXT:    s_min_u32 s34, s4, s34
-; GFX10-NEXT:    s_lshl_b32 s36, s34, 2
+; GFX10-NEXT:    s_min_u32 s36, s4, s34
 ; GFX10-NEXT:    s_mov_b32 s34, 0x1c84a50f
 ; GFX10-NEXT:    s_mov_b32 s35, 0xb73e62d9
+; GFX10-NEXT:    s_lshl_b32 s36, s36, 2
 ; GFX10-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
@@ -50,10 +50,10 @@ define amdgpu_gfx void @s_set_rounding(i32 inreg %rounding) {
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    s_add_i32 s0, s4, -4
-; GFX11-NEXT:    s_min_u32 s0, s4, s0
-; GFX11-NEXT:    s_lshl_b32 s2, s0, 2
+; GFX11-NEXT:    s_min_u32 s2, s4, s0
 ; GFX11-NEXT:    s_mov_b32 s0, 0x1c84a50f
 ; GFX11-NEXT:    s_mov_b32 s1, 0xb73e62d9
+; GFX11-NEXT:    s_lshl_b32 s2, s2, 2
 ; GFX11-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
 ; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
@@ -124,14 +124,14 @@ define amdgpu_kernel void @s_set_rounding_kernel(i32 inreg %rounding) {
 ;
 ; GFX10-LABEL: s_set_rounding_kernel:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_load_dword s0, s[4:5], 0x24
+; GFX10-NEXT:    s_load_dword s2, s[4:5], 0x24
+; GFX10-NEXT:    s_mov_b32 s0, 0x1c84a50f
+; GFX10-NEXT:    s_mov_b32 s1, 0xb73e62d9
 ; GFX10-NEXT:    ;;#ASMSTART
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    s_add_i32 s1, s0, -4
-; GFX10-NEXT:    s_min_u32 s2, s0, s1
-; GFX10-NEXT:    s_mov_b32 s0, 0x1c84a50f
-; GFX10-NEXT:    s_mov_b32 s1, 0xb73e62d9
+; GFX10-NEXT:    s_add_i32 s3, s2, -4
+; GFX10-NEXT:    s_min_u32 s2, s2, s3
 ; GFX10-NEXT:    s_lshl_b32 s2, s2, 2
 ; GFX10-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
@@ -139,14 +139,14 @@ define amdgpu_kernel void @s_set_rounding_kernel(i32 inreg %rounding) {
 ;
 ; GFX11-LABEL: s_set_rounding_kernel:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    s_load_b32 s0, s[4:5], 0x24
+; GFX11-NEXT:    s_load_b32 s2, s[4:5], 0x24
+; GFX11-NEXT:    s_mov_b32 s0, 0x1c84a50f
+; GFX11-NEXT:    s_mov_b32 s1, 0xb73e62d9
 ; GFX11-NEXT:    ;;#ASMSTART
 ; GFX11-NEXT:    ;;#ASMEND
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    s_add_i32 s1, s0, -4
-; GFX11-NEXT:    s_min_u32 s2, s0, s1
-; GFX11-NEXT:    s_mov_b32 s0, 0x1c84a50f
-; GFX11-NEXT:    s_mov_b32 s1, 0xb73e62d9
+; GFX11-NEXT:    s_add_i32 s3, s2, -4
+; GFX11-NEXT:    s_min_u32 s2, s2, s3
 ; GFX11-NEXT:    s_lshl_b32 s2, s2, 2
 ; GFX11-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
 ; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
@@ -294,10 +294,10 @@ define void @set_rounding_get_rounding() {
 ; GFX10-NEXT:    s_cmp_lt_u32 s4, 4
 ; GFX10-NEXT:    s_cselect_b32 s4, s4, s5
 ; GFX10-NEXT:    s_add_i32 s5, s4, -4
-; GFX10-NEXT:    s_min_u32 s4, s4, s5
-; GFX10-NEXT:    s_lshl_b32 s6, s4, 2
+; GFX10-NEXT:    s_min_u32 s6, s4, s5
 ; GFX10-NEXT:    s_mov_b32 s4, 0x1c84a50f
 ; GFX10-NEXT:    s_mov_b32 s5, 0xb73e62d9
+; GFX10-NEXT:    s_lshl_b32 s6, s6, 2
 ; GFX10-NEXT:    s_lshr_b64 s[4:5], s[4:5], s6
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
@@ -315,10 +315,10 @@ define void @set_rounding_get_rounding() {
 ; GFX11-NEXT:    s_cmp_lt_u32 s0, 4
 ; GFX11-NEXT:    s_cselect_b32 s0, s0, s1
 ; GFX11-NEXT:    s_add_i32 s1, s0, -4
-; GFX11-NEXT:    s_min_u32 s0, s0, s1
-; GFX11-NEXT:    s_lshl_b32 s2, s0, 2
+; GFX11-NEXT:    s_min_u32 s2, s0, s1
 ; GFX11-NEXT:    s_mov_b32 s0, 0x1c84a50f
 ; GFX11-NEXT:    s_mov_b32 s1, 0xb73e62d9
+; GFX11-NEXT:    s_lshl_b32 s2, s2, 2
 ; GFX11-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
 ; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
@@ -974,10 +974,10 @@ define amdgpu_gfx void @s_set_rounding_i2_signext(i2 signext inreg %rounding) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_sext_i32_i16 s34, s4
 ; GFX10-NEXT:    s_add_i32 s35, s34, -4
-; GFX10-NEXT:    s_min_u32 s34, s34, s35
-; GFX10-NEXT:    s_lshl_b32 s36, s34, 2
+; GFX10-NEXT:    s_min_u32 s36, s34, s35
 ; GFX10-NEXT:    s_mov_b32 s34, 0x1c84a50f
 ; GFX10-NEXT:    s_mov_b32 s35, 0xb73e62d9
+; GFX10-NEXT:    s_lshl_b32 s36, s36, 2
 ; GFX10-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
@@ -987,10 +987,10 @@ define amdgpu_gfx void @s_set_rounding_i2_signext(i2 signext inreg %rounding) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    s_sext_i32_i16 s0, s4
 ; GFX11-NEXT:    s_add_i32 s1, s0, -4
-; GFX11-NEXT:    s_min_u32 s0, s0, s1
-; GFX11-NEXT:    s_lshl_b32 s2, s0, 2
+; GFX11-NEXT:    s_min_u32 s2, s0, s1
 ; GFX11-NEXT:    s_mov_b32 s0, 0x1c84a50f
 ; GFX11-NEXT:    s_mov_b32 s1, 0xb73e62d9
+; GFX11-NEXT:    s_lshl_b32 s2, s2, 2
 ; GFX11-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
 ; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
@@ -1055,10 +1055,10 @@ define amdgpu_gfx void @s_set_rounding_i3_signext(i3 signext inreg %rounding) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_sext_i32_i16 s34, s4
 ; GFX10-NEXT:    s_add_i32 s35, s34, -4
-; GFX10-NEXT:    s_min_u32 s34, s34, s35
-; GFX10-NEXT:    s_lshl_b32 s36, s34, 2
+; GFX10-NEXT:    s_min_u32 s36, s34, s35
 ; GFX10-NEXT:    s_mov_b32 s34, 0x1c84a50f
 ; GFX10-NEXT:    s_mov_b32 s35, 0xb73e62d9
+; GFX10-NEXT:    s_lshl_b32 s36, s36, 2
 ; GFX10-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
@@ -1068,10 +1068,10 @@ define amdgpu_gfx void @s_set_rounding_i3_signext(i3 signext inreg %rounding) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    s_sext_i32_i16 s0, s4
 ; GFX11-NEXT:    s_add_i32 s1, s0, -4
-; GFX11-NEXT:    s_min_u32 s0, s0, s1
-; GFX11-NEXT:    s_lshl_b32 s2, s0, 2
+; GFX11-NEXT:    s_min_u32 s2, s0, s1
 ; GFX11-NEXT:    s_mov_b32 s0, 0x1c84a50f
 ; GFX11-NEXT:    s_mov_b32 s1, 0xb73e62d9
+; GFX11-NEXT:    s_lshl_b32 s2, s2, 2
 ; GFX11-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
 ; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
@@ -1136,10 +1136,10 @@ define amdgpu_gfx void @s_set_rounding_i3_zeroext(i3 zeroext inreg %rounding) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_and_b32 s34, 0xffff, s4
 ; GFX10-NEXT:    s_add_i32 s35, s34, -4
-; GFX10-NEXT:    s_min_u32 s34, s34, s35
-; GFX10-NEXT:    s_lshl_b32 s36, s34, 2
+; GFX10-NEXT:    s_min_u32 s36, s34, s35
 ; GFX10-NEXT:    s_mov_b32 s34, 0x1c84a50f
 ; GFX10-NEXT:    s_mov_b32 s35, 0xb73e62d9
+; GFX10-NEXT:    s_lshl_b32 s36, s36, 2
 ; GFX10-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
@@ -1149,10 +1149,10 @@ define amdgpu_gfx void @s_set_rounding_i3_zeroext(i3 zeroext inreg %rounding) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    s_and_b32 s0, 0xffff, s4
 ; GFX11-NEXT:    s_add_i32 s1, s0, -4
-; GFX11-NEXT:    s_min_u32 s0, s0, s1
-; GFX11-NEXT:    s_lshl_b32 s2, s0, 2
+; GFX11-NEXT:    s_min_u32 s2, s0, s1
 ; GFX11-NEXT:    s_mov_b32 s0, 0x1c84a50f
 ; GFX11-NEXT:    s_mov_b32 s1, 0xb73e62d9
+; GFX11-NEXT:    s_lshl_b32 s2, s2, 2
 ; GFX11-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
 ; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
@@ -1569,10 +1569,10 @@ define amdgpu_gfx void @s_set_rounding_select_4_0(i32 inreg %cond) {
 ; GFX10-NEXT:    v_readfirstlane_b32 s34, v0
 ; GFX10-NEXT:    s_lshl_b32 s34, s34, 2
 ; GFX10-NEXT:    s_add_i32 s35, s34, -4
-; GFX10-NEXT:    s_min_u32 s34, s34, s35
-; GFX10-NEXT:    s_lshl_b32 s36, s34, 2
+; GFX10-NEXT:    s_min_u32 s36, s34, s35
 ; GFX10-NEXT:    s_mov_b32 s34, 0x1c84a50f
 ; GFX10-NEXT:    s_mov_b32 s35, 0xb73e62d9
+; GFX10-NEXT:    s_lshl_b32 s36, s36, 2
 ; GFX10-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
@@ -1586,10 +1586,10 @@ define amdgpu_gfx void @s_set_rounding_select_4_0(i32 inreg %cond) {
 ; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11-NEXT:    s_lshl_b32 s0, s0, 2
 ; GFX11-NEXT:    s_add_i32 s1, s0, -4
-; GFX11-NEXT:    s_min_u32 s0, s0, s1
-; GFX11-NEXT:    s_lshl_b32 s2, s0, 2
+; GFX11-NEXT:    s_min_u32 s2, s0, s1
 ; GFX11-NEXT:    s_mov_b32 s0, 0x1c84a50f
 ; GFX11-NEXT:    s_mov_b32 s1, 0xb73e62d9
+; GFX11-NEXT:    s_lshl_b32 s2, s2, 2
 ; GFX11-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
 ; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
@@ -1634,10 +1634,10 @@ define amdgpu_gfx void @s_set_rounding_select_3_5(i32 inreg %cond) {
 ; GFX10-NEXT:    s_cmp_eq_u32 s4, 0
 ; GFX10-NEXT:    s_cselect_b32 s34, 3, 5
 ; GFX10-NEXT:    s_add_i32 s35, s34, -4
-; GFX10-NEXT:    s_min_u32 s34, s34, s35
-; GFX10-NEXT:    s_lshl_b32 s36, s34, 2
+; GFX10-NEXT:    s_min_u32 s36, s34, s35
 ; GFX10-NEXT:    s_mov_b32 s34, 0x1c84a50f
 ; GFX10-NEXT:    s_mov_b32 s35, 0xb73e62d9
+; GFX10-NEXT:    s_lshl_b32 s36, s36, 2
 ; GFX10-NEXT:    s_lshr_b64 s[34:35], s[34:35], s36
 ; GFX10-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
@@ -1648,10 +1648,10 @@ define amdgpu_gfx void @s_set_rounding_select_3_5(i32 inreg %cond) {
 ; GFX11-NEXT:    s_cmp_eq_u32 s4, 0
 ; GFX11-NEXT:    s_cselect_b32 s0, 3, 5
 ; GFX11-NEXT:    s_add_i32 s1, s0, -4
-; GFX11-NEXT:    s_min_u32 s0, s0, s1
-; GFX11-NEXT:    s_lshl_b32 s2, s0, 2
+; GFX11-NEXT:    s_min_u32 s2, s0, s1
 ; GFX11-NEXT:    s_mov_b32 s0, 0x1c84a50f
 ; GFX11-NEXT:    s_mov_b32 s1, 0xb73e62d9
+; GFX11-NEXT:    s_lshl_b32 s2, s2, 2
 ; GFX11-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
 ; GFX11-NEXT:    s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
@@ -1747,13 +1747,13 @@ define amdgpu_kernel void @get_rounding_after_set_rounding_1() {
 ; GFX10-LABEL: get_rounding_after_set_rounding_1:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_round_mode 0x0
-; GFX10-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
-; GFX10-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10-NEXT:    s_lshl_b32 s2, s0, 2
 ; GFX10-NEXT:    s_mov_b32 s0, 0xeb24da71
+; GFX10-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4)
 ; GFX10-NEXT:    s_mov_b32 s1, 0xc96f385
+; GFX10-NEXT:    s_lshl_b32 s2, s2, 2
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX10-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
+; GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-NEXT:    s_and_b32 s0, s0, 15
 ; GFX10-NEXT:    s_add_i32 s1, s0, 4
 ; GFX10-NEXT:    s_cmp_lt_u32 s0, 4
@@ -1766,11 +1766,11 @@ define amdgpu_kernel void @get_rounding_after_set_rounding_1() {
 ; GFX11-LABEL: get_rounding_after_set_rounding_1:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_round_mode 0x0
-; GFX11-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
-; GFX11-NEXT:    s_lshl_b32 s2, s0, 2
 ; GFX11-NEXT:    s_mov_b32 s0, 0xeb24da71
+; GFX11-NEXT:    s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4)
 ; GFX11-NEXT:    s_mov_b32 s1, 0xc96f385
+; GFX11-NEXT:    s_lshl_b32 s2, s2, 2
+; GFX11-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX11-NEXT:    s_lshr_b64 s[0:1], s[0:1], s2
 ; GFX11-NEXT:    s_and_b32 s0, s0, 15
 ; GFX11-NEXT:    s_add_i32 s1, s0, 4

diff  --git a/llvm/test/CodeGen/AMDGPU/offset-split-global.ll b/llvm/test/CodeGen/AMDGPU/offset-split-global.ll
index 4ebbb10fae187c..fd62ba3f9da1f8 100644
--- a/llvm/test/CodeGen/AMDGPU/offset-split-global.ll
+++ b/llvm/test/CodeGen/AMDGPU/offset-split-global.ll
@@ -3185,8 +3185,8 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split0(p
 ; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0:
 ; GFX12-SDAG:       ; %bb.0:
 ; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX12-SDAG-NEXT:    s_movk_i32 s2, 0x7ff
+; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX12-SDAG-NEXT:    s_brev_b32 s3, 1
 ; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-SDAG-NEXT:    s_add_nc_u64 s[0:1], s[0:1], s[2:3]
@@ -3253,8 +3253,8 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split1(p
 ; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1:
 ; GFX12-SDAG:       ; %bb.0:
 ; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX12-SDAG-NEXT:    s_movk_i32 s2, 0x800
+; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX12-SDAG-NEXT:    s_brev_b32 s3, 1
 ; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-SDAG-NEXT:    s_add_nc_u64 s[0:1], s[0:1], s[2:3]
@@ -3321,8 +3321,8 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split0(p
 ; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0:
 ; GFX12-SDAG:       ; %bb.0:
 ; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX12-SDAG-NEXT:    s_movk_i32 s2, 0xfff
+; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX12-SDAG-NEXT:    s_brev_b32 s3, 1
 ; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-SDAG-NEXT:    s_add_nc_u64 s[0:1], s[0:1], s[2:3]
@@ -3389,8 +3389,8 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split1(p
 ; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1:
 ; GFX12-SDAG:       ; %bb.0:
 ; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX12-SDAG-NEXT:    s_movk_i32 s2, 0x1000
+; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX12-SDAG-NEXT:    s_brev_b32 s3, 1
 ; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-SDAG-NEXT:    s_add_nc_u64 s[0:1], s[0:1], s[2:3]
@@ -3457,8 +3457,8 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split0(p
 ; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0:
 ; GFX12-SDAG:       ; %bb.0:
 ; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX12-SDAG-NEXT:    s_movk_i32 s2, 0x1fff
+; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX12-SDAG-NEXT:    s_brev_b32 s3, 1
 ; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-SDAG-NEXT:    s_add_nc_u64 s[0:1], s[0:1], s[2:3]
@@ -3525,8 +3525,8 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split1(p
 ; GFX12-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1:
 ; GFX12-SDAG:       ; %bb.0:
 ; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX12-SDAG-NEXT:    s_movk_i32 s2, 0x2000
+; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX12-SDAG-NEXT:    s_brev_b32 s3, 1
 ; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
 ; GFX12-SDAG-NEXT:    s_add_nc_u64 s[0:1], s[0:1], s[2:3]

diff  --git a/llvm/test/CodeGen/AMDGPU/roundeven.ll b/llvm/test/CodeGen/AMDGPU/roundeven.ll
index 0f95c0255d3abc..0aff5ca25149fc 100644
--- a/llvm/test/CodeGen/AMDGPU/roundeven.ll
+++ b/llvm/test/CodeGen/AMDGPU/roundeven.ll
@@ -1043,12 +1043,12 @@ define double @v_roundeven_f64(double %x) {
 ; SDAG_GFX6-LABEL: v_roundeven_f64:
 ; SDAG_GFX6:       ; %bb.0:
 ; SDAG_GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SDAG_GFX6-NEXT:    s_brev_b32 s4, -2
+; SDAG_GFX6-NEXT:    s_brev_b32 s6, -2
 ; SDAG_GFX6-NEXT:    v_mov_b32_e32 v2, 0x43300000
-; SDAG_GFX6-NEXT:    v_bfi_b32 v3, s4, v2, v1
+; SDAG_GFX6-NEXT:    v_bfi_b32 v3, s6, v2, v1
 ; SDAG_GFX6-NEXT:    v_mov_b32_e32 v2, 0
-; SDAG_GFX6-NEXT:    v_add_f64 v[4:5], v[0:1], v[2:3]
 ; SDAG_GFX6-NEXT:    s_mov_b32 s4, -1
+; SDAG_GFX6-NEXT:    v_add_f64 v[4:5], v[0:1], v[2:3]
 ; SDAG_GFX6-NEXT:    s_mov_b32 s5, 0x432fffff
 ; SDAG_GFX6-NEXT:    v_add_f64 v[2:3], v[4:5], -v[2:3]
 ; SDAG_GFX6-NEXT:    v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5]


        


More information about the llvm-commits mailing list