[llvm] [DAG] isGuaranteedNotToBeUndefOrPoison - ISD::LOAD nodes are not poison if the LoadSDNode is known to be dereferenceable (PR #160884)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 26 07:02:19 PDT 2025


https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/160884

>From e62c1e7884d43fde6d8b5e4916a25c027db09662 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Fri, 26 Sep 2025 14:37:06 +0100
Subject: [PATCH 1/2] [DAG] isGuaranteedNotToBeUndefOrPoison - ISD::LOAD nodes
 are not poison if the LoadSDNode is known to be dereferenceable

Matches the behaviour in ValueTracking.cpp

Frozen ISD::LOAD nodes do become an issue when we more aggressively push freeze through a DAG - so we need to find more cases where can safely unfreeze loads (e.g. constant pool)?
---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  3 +
 .../AMDGPU/amdgpu-codegenprepare-idiv.ll      |  8 +--
 .../AMDGPU/divergence-driven-trunc-to-i1.ll   | 41 ++++++-------
 llvm/test/CodeGen/AMDGPU/sra.ll               | 60 +++++++++----------
 llvm/test/CodeGen/AMDGPU/srem64.ll            | 16 ++---
 llvm/test/CodeGen/X86/oddsubvector.ll         |  8 +--
 llvm/test/CodeGen/X86/pr38539.ll              | 44 +++++++-------
 7 files changed, 89 insertions(+), 91 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 7aa293af963e6..58dee9494523d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5631,6 +5631,9 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
            });
   }
 
+  case ISD::LOAD:
+    return cast<LoadSDNode>(Op)->isDereferenceable();
+
     // TODO: Search for noundef attributes from library functions.
 
     // TODO: Pointers dereferenced by ISD::LOAD/STORE ops are noundef.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
index b69afb8e301bb..6eabcdebcfeec 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
@@ -7791,7 +7791,7 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
 ;
 ; GFX6-LABEL: sdiv_i64_pow2_shl_denom:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0xd
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0xd
 ; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x9
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
@@ -7960,7 +7960,7 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
 ;
 ; GFX9-LABEL: sdiv_i64_pow2_shl_denom:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x34
+; GFX9-NEXT:    s_load_dword s0, s[4:5], 0x34
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_lshl_b64 s[0:1], 0x1000, s0
 ; GFX9-NEXT:    s_ashr_i32 s6, s1, 31
@@ -9059,7 +9059,7 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
 ;
 ; GFX6-LABEL: srem_i64_pow2_shl_denom:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0xd
+; GFX6-NEXT:    s_load_dword s0, s[4:5], 0xd
 ; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x9
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_lshl_b64 s[0:1], 0x1000, s0
@@ -9230,7 +9230,7 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
 ;
 ; GFX9-LABEL: srem_i64_pow2_shl_denom:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x34
+; GFX9-NEXT:    s_load_dword s0, s[4:5], 0x34
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_lshl_b64 s[0:1], 0x1000, s0
 ; GFX9-NEXT:    s_ashr_i32 s2, s1, 31
diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll
index 3303cb86c874e..4adc376f74407 100644
--- a/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll
@@ -15,16 +15,13 @@ define amdgpu_kernel void @uniform_trunc_i16_to_i1(ptr addrspace(1) %out, i16 %x
   ; GCN-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
   ; GCN-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY2]], %subreg.sub0, killed [[COPY1]], %subreg.sub1, killed [[S_MOV_B32_1]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3
   ; GCN-NEXT:   [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[S_LOAD_DWORD_IMM]]
-  ; GCN-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 16
-  ; GCN-NEXT:   [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[S_LOAD_DWORD_IMM]], killed [[S_MOV_B32_2]], implicit-def dead $scc
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY killed [[S_LSHR_B32_]]
-  ; GCN-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY3]], implicit-def dead $scc
-  ; GCN-NEXT:   S_CMP_EQ_U32 killed [[S_AND_B32_]], 1, implicit-def $scc
+  ; GCN-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 65536, [[S_LOAD_DWORD_IMM]], implicit-def dead $scc
+  ; GCN-NEXT:   S_CMP_LG_U32 killed [[S_AND_B32_]], 0, implicit-def $scc
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sreg_64 = COPY $scc
+  ; GCN-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+  ; GCN-NEXT:   S_CMP_LT_I32 killed [[S_SEXT_I32_I16_]], killed [[S_MOV_B32_2]], implicit-def $scc
   ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sreg_64 = COPY $scc
-  ; GCN-NEXT:   [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 0
-  ; GCN-NEXT:   S_CMP_LT_I32 killed [[S_SEXT_I32_I16_]], killed [[S_MOV_B32_3]], implicit-def $scc
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sreg_64 = COPY $scc
-  ; GCN-NEXT:   [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[COPY5]], killed [[COPY4]], implicit-def dead $scc
+  ; GCN-NEXT:   [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[COPY4]], killed [[COPY3]], implicit-def dead $scc
   ; GCN-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B64_]], implicit $exec
   ; GCN-NEXT:   BUFFER_STORE_BYTE_OFFSET killed [[V_CNDMASK_B32_e64_]], killed [[REG_SEQUENCE]], 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.out.load, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
@@ -68,16 +65,15 @@ define amdgpu_kernel void @uniform_trunc_i32_to_i1(ptr addrspace(1) %out, i32 %x
   ; GCN-NEXT:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
   ; GCN-NEXT:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
   ; GCN-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY2]], %subreg.sub0, killed [[COPY1]], %subreg.sub1, killed [[S_MOV_B32_1]], %subreg.sub2, killed [[S_MOV_B32_]], %subreg.sub3
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sreg_64 = COPY killed [[S_LOAD_DWORDX2_IMM1]]
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub0
-  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY3]].sub1
-  ; GCN-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY5]], implicit-def dead $scc
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM1]].sub0
+  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM1]].sub1
+  ; GCN-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY4]], implicit-def dead $scc
   ; GCN-NEXT:   S_CMP_EQ_U32 killed [[S_AND_B32_]], 1, implicit-def $scc
-  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sreg_64 = COPY $scc
+  ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sreg_64 = COPY $scc
   ; GCN-NEXT:   [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0
-  ; GCN-NEXT:   S_CMP_LT_I32 killed [[COPY4]], killed [[S_MOV_B32_2]], implicit-def $scc
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sreg_64 = COPY $scc
-  ; GCN-NEXT:   [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[COPY7]], killed [[COPY6]], implicit-def dead $scc
+  ; GCN-NEXT:   S_CMP_LT_I32 killed [[COPY3]], killed [[S_MOV_B32_2]], implicit-def $scc
+  ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sreg_64 = COPY $scc
+  ; GCN-NEXT:   [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[COPY6]], killed [[COPY5]], implicit-def dead $scc
   ; GCN-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B64_]], implicit $exec
   ; GCN-NEXT:   BUFFER_STORE_BYTE_OFFSET killed [[V_CNDMASK_B32_e64_]], killed [[REG_SEQUENCE]], 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.out.load, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
@@ -126,14 +122,13 @@ define amdgpu_kernel void @uniform_trunc_i64_to_i1(ptr addrspace(1) %out, i64 %x
   ; GCN-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub3
   ; GCN-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX4_IMM]].sub2
   ; GCN-NEXT:   [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[COPY6]], %subreg.sub0, killed [[COPY5]], %subreg.sub1
-  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY killed [[S_LOAD_DWORD_IMM]]
-  ; GCN-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY7]], implicit-def dead $scc
+  ; GCN-NEXT:   [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[S_LOAD_DWORD_IMM]], implicit-def dead $scc
   ; GCN-NEXT:   S_CMP_EQ_U32 killed [[S_AND_B32_]], 1, implicit-def $scc
-  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sreg_64 = COPY $scc
+  ; GCN-NEXT:   [[COPY7:%[0-9]+]]:sreg_64 = COPY $scc
   ; GCN-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
-  ; GCN-NEXT:   [[COPY9:%[0-9]+]]:vreg_64 = COPY killed [[S_MOV_B64_]]
-  ; GCN-NEXT:   [[V_CMP_LT_I64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I64_e64 killed [[REG_SEQUENCE2]], [[COPY9]], implicit $exec
-  ; GCN-NEXT:   [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[V_CMP_LT_I64_e64_]], killed [[COPY8]], implicit-def dead $scc
+  ; GCN-NEXT:   [[COPY8:%[0-9]+]]:vreg_64 = COPY killed [[S_MOV_B64_]]
+  ; GCN-NEXT:   [[V_CMP_LT_I64_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I64_e64 killed [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
+  ; GCN-NEXT:   [[S_OR_B64_:%[0-9]+]]:sreg_64_xexec = S_OR_B64 killed [[V_CMP_LT_I64_e64_]], killed [[COPY7]], implicit-def dead $scc
   ; GCN-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed [[S_OR_B64_]], implicit $exec
   ; GCN-NEXT:   BUFFER_STORE_BYTE_OFFSET killed [[V_CNDMASK_B32_e64_]], killed [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (store (s8) into %ir.2, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
diff --git a/llvm/test/CodeGen/AMDGPU/sra.ll b/llvm/test/CodeGen/AMDGPU/sra.ll
index 80c0d0f45eb97..508bd78785b64 100644
--- a/llvm/test/CodeGen/AMDGPU/sra.ll
+++ b/llvm/test/CodeGen/AMDGPU/sra.ll
@@ -830,16 +830,16 @@ define amdgpu_kernel void @v_ashr_32_i64(ptr addrspace(1) %out, ptr addrspace(1)
 define amdgpu_kernel void @s_ashr_33_i64(ptr addrspace(1) %out, [8 x i32], i64 %a, [8 x i32], i64 %b) {
 ; SI-LABEL: s_ashr_33_i64:
 ; SI:       ; %bb.0:
-; SI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x13
+; SI-NEXT:    s_load_dword s6, s[4:5], 0x14
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x1d
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_ashr_i32 s6, s7, 31
-; SI-NEXT:    s_ashr_i32 s7, s7, 1
-; SI-NEXT:    s_add_u32 s4, s7, s4
-; SI-NEXT:    s_addc_u32 s5, s6, s5
+; SI-NEXT:    s_ashr_i32 s7, s6, 31
+; SI-NEXT:    s_ashr_i32 s6, s6, 1
+; SI-NEXT:    s_add_u32 s4, s6, s4
+; SI-NEXT:    s_addc_u32 s5, s7, s5
 ; SI-NEXT:    v_mov_b32_e32 v0, s4
 ; SI-NEXT:    v_mov_b32_e32 v1, s5
 ; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
@@ -847,16 +847,16 @@ define amdgpu_kernel void @s_ashr_33_i64(ptr addrspace(1) %out, [8 x i32], i64 %
 ;
 ; VI-LABEL: s_ashr_33_i64:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x4c
+; VI-NEXT:    s_load_dword s6, s[4:5], 0x50
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; VI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x74
 ; VI-NEXT:    s_mov_b32 s3, 0xf000
 ; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_ashr_i32 s6, s7, 31
-; VI-NEXT:    s_ashr_i32 s7, s7, 1
-; VI-NEXT:    s_add_u32 s4, s7, s4
-; VI-NEXT:    s_addc_u32 s5, s6, s5
+; VI-NEXT:    s_ashr_i32 s7, s6, 31
+; VI-NEXT:    s_ashr_i32 s6, s6, 1
+; VI-NEXT:    s_add_u32 s4, s6, s4
+; VI-NEXT:    s_addc_u32 s5, s7, s5
 ; VI-NEXT:    v_mov_b32_e32 v0, s4
 ; VI-NEXT:    v_mov_b32_e32 v1, s5
 ; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
@@ -953,16 +953,16 @@ define amdgpu_kernel void @v_ashr_33_i64(ptr addrspace(1) %out, ptr addrspace(1)
 define amdgpu_kernel void @s_ashr_62_i64(ptr addrspace(1) %out, [8 x i32], i64 %a, [8 x i32], i64 %b) {
 ; SI-LABEL: s_ashr_62_i64:
 ; SI:       ; %bb.0:
-; SI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x13
+; SI-NEXT:    s_load_dword s6, s[4:5], 0x14
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x1d
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_ashr_i32 s6, s7, 31
-; SI-NEXT:    s_ashr_i32 s7, s7, 30
-; SI-NEXT:    s_add_u32 s4, s7, s4
-; SI-NEXT:    s_addc_u32 s5, s6, s5
+; SI-NEXT:    s_ashr_i32 s7, s6, 31
+; SI-NEXT:    s_ashr_i32 s6, s6, 30
+; SI-NEXT:    s_add_u32 s4, s6, s4
+; SI-NEXT:    s_addc_u32 s5, s7, s5
 ; SI-NEXT:    v_mov_b32_e32 v0, s4
 ; SI-NEXT:    v_mov_b32_e32 v1, s5
 ; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
@@ -970,16 +970,16 @@ define amdgpu_kernel void @s_ashr_62_i64(ptr addrspace(1) %out, [8 x i32], i64 %
 ;
 ; VI-LABEL: s_ashr_62_i64:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x4c
+; VI-NEXT:    s_load_dword s6, s[4:5], 0x50
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; VI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x74
 ; VI-NEXT:    s_mov_b32 s3, 0xf000
 ; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_ashr_i32 s6, s7, 31
-; VI-NEXT:    s_ashr_i32 s7, s7, 30
-; VI-NEXT:    s_add_u32 s4, s7, s4
-; VI-NEXT:    s_addc_u32 s5, s6, s5
+; VI-NEXT:    s_ashr_i32 s7, s6, 31
+; VI-NEXT:    s_ashr_i32 s6, s6, 30
+; VI-NEXT:    s_add_u32 s4, s6, s4
+; VI-NEXT:    s_addc_u32 s5, s7, s5
 ; VI-NEXT:    v_mov_b32_e32 v0, s4
 ; VI-NEXT:    v_mov_b32_e32 v1, s5
 ; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
@@ -1077,15 +1077,15 @@ define amdgpu_kernel void @v_ashr_62_i64(ptr addrspace(1) %out, ptr addrspace(1)
 define amdgpu_kernel void @s_ashr_63_i64(ptr addrspace(1) %out, [8 x i32], i64 %a, [8 x i32], i64 %b) {
 ; SI-LABEL: s_ashr_63_i64:
 ; SI:       ; %bb.0:
-; SI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x13
-; SI-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x1d
+; SI-NEXT:    s_load_dword s8, s[4:5], 0x14
+; SI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x1d
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_ashr_i32 s5, s7, 31
-; SI-NEXT:    s_add_u32 s4, s5, s8
-; SI-NEXT:    s_addc_u32 s5, s5, s9
+; SI-NEXT:    s_ashr_i32 s5, s8, 31
+; SI-NEXT:    s_add_u32 s4, s5, s6
+; SI-NEXT:    s_addc_u32 s5, s5, s7
 ; SI-NEXT:    v_mov_b32_e32 v0, s4
 ; SI-NEXT:    v_mov_b32_e32 v1, s5
 ; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
@@ -1093,15 +1093,15 @@ define amdgpu_kernel void @s_ashr_63_i64(ptr addrspace(1) %out, [8 x i32], i64 %
 ;
 ; VI-LABEL: s_ashr_63_i64:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x4c
-; VI-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x74
+; VI-NEXT:    s_load_dword s8, s[4:5], 0x50
+; VI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x74
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
 ; VI-NEXT:    s_mov_b32 s3, 0xf000
 ; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_ashr_i32 s5, s7, 31
-; VI-NEXT:    s_add_u32 s4, s5, s8
-; VI-NEXT:    s_addc_u32 s5, s5, s9
+; VI-NEXT:    s_ashr_i32 s5, s8, 31
+; VI-NEXT:    s_add_u32 s4, s5, s6
+; VI-NEXT:    s_addc_u32 s5, s5, s7
 ; VI-NEXT:    v_mov_b32_e32 v0, s4
 ; VI-NEXT:    v_mov_b32_e32 v1, s5
 ; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
diff --git a/llvm/test/CodeGen/AMDGPU/srem64.ll b/llvm/test/CodeGen/AMDGPU/srem64.ll
index 2d95875cad882..c9f9ec5365685 100644
--- a/llvm/test/CodeGen/AMDGPU/srem64.ll
+++ b/llvm/test/CodeGen/AMDGPU/srem64.ll
@@ -1129,25 +1129,25 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
 ; GCN-IR-LABEL: s_test_srem33_64:
 ; GCN-IR:       ; %bb.0: ; %_udiv-special-cases
 ; GCN-IR-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GCN-IR-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0xd
+; GCN-IR-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0xd
 ; GCN-IR-NEXT:    s_mov_b32 s13, 0
 ; GCN-IR-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-IR-NEXT:    s_ashr_i64 s[2:3], s[2:3], 31
-; GCN-IR-NEXT:    s_ashr_i64 s[8:9], s[4:5], 31
 ; GCN-IR-NEXT:    s_ashr_i32 s4, s3, 31
+; GCN-IR-NEXT:    s_ashr_i64 s[6:7], s[2:3], 31
 ; GCN-IR-NEXT:    s_mov_b32 s5, s4
-; GCN-IR-NEXT:    s_xor_b64 s[2:3], s[2:3], s[4:5]
+; GCN-IR-NEXT:    s_ashr_i64 s[10:11], s[8:9], 31
+; GCN-IR-NEXT:    s_xor_b64 s[2:3], s[6:7], s[4:5]
 ; GCN-IR-NEXT:    s_sub_u32 s6, s2, s4
 ; GCN-IR-NEXT:    s_subb_u32 s7, s3, s4
 ; GCN-IR-NEXT:    s_ashr_i32 s2, s9, 31
 ; GCN-IR-NEXT:    s_mov_b32 s3, s2
-; GCN-IR-NEXT:    s_xor_b64 s[8:9], s[8:9], s[2:3]
+; GCN-IR-NEXT:    s_xor_b64 s[8:9], s[10:11], s[2:3]
 ; GCN-IR-NEXT:    s_sub_u32 s8, s8, s2
 ; GCN-IR-NEXT:    s_subb_u32 s9, s9, s2
-; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[10:11], s[6:7], 0
-; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[8:9], 0
+; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[6:7], 0
+; GCN-IR-NEXT:    v_cmp_eq_u64_e64 s[10:11], s[8:9], 0
 ; GCN-IR-NEXT:    s_flbit_i32_b64 s12, s[8:9]
-; GCN-IR-NEXT:    s_or_b64 s[10:11], s[2:3], s[10:11]
+; GCN-IR-NEXT:    s_or_b64 s[10:11], s[10:11], s[2:3]
 ; GCN-IR-NEXT:    s_flbit_i32_b64 s20, s[6:7]
 ; GCN-IR-NEXT:    s_sub_u32 s14, s12, s20
 ; GCN-IR-NEXT:    s_subb_u32 s15, 0, 0
diff --git a/llvm/test/CodeGen/X86/oddsubvector.ll b/llvm/test/CodeGen/X86/oddsubvector.ll
index f53983036a016..d6406a3f00877 100644
--- a/llvm/test/CodeGen/X86/oddsubvector.ll
+++ b/llvm/test/CodeGen/X86/oddsubvector.ll
@@ -155,10 +155,10 @@ define <16 x i32> @PR42819(ptr %a0) {
 define void @PR42833() {
 ; SSE2-LABEL: PR42833:
 ; SSE2:       # %bb.0:
+; SSE2-NEXT:    movl b(%rip), %eax
 ; SSE2-NEXT:    movdqa c+144(%rip), %xmm2
 ; SSE2-NEXT:    movdqa c+128(%rip), %xmm0
-; SSE2-NEXT:    movd %xmm0, %eax
-; SSE2-NEXT:    addl b(%rip), %eax
+; SSE2-NEXT:    addl c+128(%rip), %eax
 ; SSE2-NEXT:    movd %eax, %xmm1
 ; SSE2-NEXT:    movd %eax, %xmm3
 ; SSE2-NEXT:    paddd %xmm0, %xmm3
@@ -191,10 +191,10 @@ define void @PR42833() {
 ;
 ; SSE42-LABEL: PR42833:
 ; SSE42:       # %bb.0:
+; SSE42-NEXT:    movl b(%rip), %eax
 ; SSE42-NEXT:    movdqa c+144(%rip), %xmm1
 ; SSE42-NEXT:    movdqa c+128(%rip), %xmm0
-; SSE42-NEXT:    movd %xmm0, %eax
-; SSE42-NEXT:    addl b(%rip), %eax
+; SSE42-NEXT:    addl c+128(%rip), %eax
 ; SSE42-NEXT:    movd %eax, %xmm2
 ; SSE42-NEXT:    paddd %xmm0, %xmm2
 ; SSE42-NEXT:    movdqa d+144(%rip), %xmm3
diff --git a/llvm/test/CodeGen/X86/pr38539.ll b/llvm/test/CodeGen/X86/pr38539.ll
index 412455384e937..6c947254e5e7c 100644
--- a/llvm/test/CodeGen/X86/pr38539.ll
+++ b/llvm/test/CodeGen/X86/pr38539.ll
@@ -23,26 +23,26 @@ define void @f() nounwind {
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    andl $-16, %esp
 ; X86-NEXT:    subl $160, %esp
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    shll $30, %ecx
+; X86-NEXT:    movl %ecx, %edi
+; X86-NEXT:    sarl $30, %edi
+; X86-NEXT:    sarl $31, %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movzbl (%eax), %eax
-; X86-NEXT:    movzbl (%eax), %ecx
+; X86-NEXT:    movzbl (%eax), %ebx
 ; X86-NEXT:    movzbl %al, %eax
-; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT:    divb %cl
-; X86-NEXT:    movl %edi, %eax
-; X86-NEXT:    shll $30, %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    sarl $30, %ecx
-; X86-NEXT:    sarl $31, %eax
-; X86-NEXT:    xorl %eax, %edi
-; X86-NEXT:    xorl %eax, %edx
-; X86-NEXT:    shrdl $1, %eax, %ecx
-; X86-NEXT:    xorl %ecx, %esi
-; X86-NEXT:    subl %ecx, %esi
-; X86-NEXT:    sbbl %eax, %edx
-; X86-NEXT:    sbbl %eax, %edi
+; X86-NEXT:    movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    divb %bl
+; X86-NEXT:    xorl %ecx, %edx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    shldl $31, %edi, %eax
+; X86-NEXT:    xorl %ecx, %edi
+; X86-NEXT:    xorl %eax, %esi
+; X86-NEXT:    subl %eax, %esi
+; X86-NEXT:    sbbl %ecx, %edx
+; X86-NEXT:    sbbl %ecx, %edi
 ; X86-NEXT:    movl %edi, %ecx
 ; X86-NEXT:    shldl $30, %edx, %ecx
 ; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
@@ -114,16 +114,16 @@ define void @f() nounwind {
 ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
 ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
 ; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 112(%esp,%esi), %eax
 ; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    movl 112(%esp,%esi), %edi
-; X86-NEXT:    movl 116(%esp,%esi), %eax
+; X86-NEXT:    movl 116(%esp,%esi), %edi
 ; X86-NEXT:    movl 120(%esp,%esi), %esi
-; X86-NEXT:    shldl %cl, %eax, %esi
+; X86-NEXT:    shldl %cl, %edi, %esi
 ; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    shldl %cl, %edi, %eax
-; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT:    shll %cl, %edi
+; X86-NEXT:    shldl %cl, %eax, %edi
 ; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; X86-NEXT:    orl %edx, %eax
 ; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill

>From 7fd8493dab964ec1a756995a5991a8af95619ace Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Fri, 26 Sep 2025 15:01:55 +0100
Subject: [PATCH 2/2] update missed tests

---
 llvm/test/CodeGen/NVPTX/i128.ll      | 94 ++++++++++++++--------------
 llvm/test/CodeGen/SystemZ/pr60413.ll | 50 +++++++--------
 2 files changed, 70 insertions(+), 74 deletions(-)

diff --git a/llvm/test/CodeGen/NVPTX/i128.ll b/llvm/test/CodeGen/NVPTX/i128.ll
index cdbbabe3e3b05..23a54662bfb06 100644
--- a/llvm/test/CodeGen/NVPTX/i128.ll
+++ b/llvm/test/CodeGen/NVPTX/i128.ll
@@ -13,14 +13,14 @@ define i128 @srem_i128(i128 %lhs, i128 %rhs) {
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd8, %rd9}, [srem_i128_param_0];
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd10, %rd11}, [srem_i128_param_1];
 ; CHECK-NEXT:    shr.s64 %rd1, %rd9, 63;
+; CHECK-NEXT:    setp.lt.s64 %p1, %rd9, 0;
 ; CHECK-NEXT:    sub.cc.s64 %rd12, 0, %rd8;
 ; CHECK-NEXT:    subc.cc.s64 %rd13, 0, %rd9;
-; CHECK-NEXT:    setp.lt.s64 %p1, %rd9, 0;
 ; CHECK-NEXT:    selp.b64 %rd3, %rd13, %rd9, %p1;
 ; CHECK-NEXT:    selp.b64 %rd2, %rd12, %rd8, %p1;
+; CHECK-NEXT:    setp.lt.s64 %p2, %rd11, 0;
 ; CHECK-NEXT:    sub.cc.s64 %rd14, 0, %rd10;
 ; CHECK-NEXT:    subc.cc.s64 %rd15, 0, %rd11;
-; CHECK-NEXT:    setp.lt.s64 %p2, %rd11, 0;
 ; CHECK-NEXT:    selp.b64 %rd5, %rd15, %rd11, %p2;
 ; CHECK-NEXT:    selp.b64 %rd4, %rd14, %rd10, %p2;
 ; CHECK-NEXT:    or.b64 %rd16, %rd4, %rd5;
@@ -151,24 +151,24 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) {
 ; CHECK-NEXT:    .reg .b64 %rd<66>;
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0: // %_udiv-special-cases
-; CHECK-NEXT:    ld.param.v2.b64 {%rd5, %rd6}, [urem_i128_param_0];
-; CHECK-NEXT:    ld.param.v2.b64 {%rd1, %rd2}, [urem_i128_param_1];
-; CHECK-NEXT:    or.b64 %rd7, %rd1, %rd2;
+; CHECK-NEXT:    ld.param.v2.b64 {%rd1, %rd2}, [urem_i128_param_0];
+; CHECK-NEXT:    ld.param.v2.b64 {%rd3, %rd4}, [urem_i128_param_1];
+; CHECK-NEXT:    or.b64 %rd7, %rd3, %rd4;
 ; CHECK-NEXT:    setp.eq.b64 %p1, %rd7, 0;
-; CHECK-NEXT:    or.b64 %rd8, %rd5, %rd6;
+; CHECK-NEXT:    or.b64 %rd8, %rd1, %rd2;
 ; CHECK-NEXT:    setp.eq.b64 %p2, %rd8, 0;
 ; CHECK-NEXT:    or.pred %p3, %p1, %p2;
-; CHECK-NEXT:    setp.ne.b64 %p4, %rd2, 0;
-; CHECK-NEXT:    clz.b64 %r1, %rd2;
+; CHECK-NEXT:    setp.ne.b64 %p4, %rd4, 0;
+; CHECK-NEXT:    clz.b64 %r1, %rd4;
 ; CHECK-NEXT:    cvt.u64.u32 %rd9, %r1;
-; CHECK-NEXT:    clz.b64 %r2, %rd1;
+; CHECK-NEXT:    clz.b64 %r2, %rd3;
 ; CHECK-NEXT:    cvt.u64.u32 %rd10, %r2;
 ; CHECK-NEXT:    add.s64 %rd11, %rd10, 64;
 ; CHECK-NEXT:    selp.b64 %rd12, %rd9, %rd11, %p4;
-; CHECK-NEXT:    setp.ne.b64 %p5, %rd6, 0;
-; CHECK-NEXT:    clz.b64 %r3, %rd6;
+; CHECK-NEXT:    setp.ne.b64 %p5, %rd2, 0;
+; CHECK-NEXT:    clz.b64 %r3, %rd2;
 ; CHECK-NEXT:    cvt.u64.u32 %rd13, %r3;
-; CHECK-NEXT:    clz.b64 %r4, %rd5;
+; CHECK-NEXT:    clz.b64 %r4, %rd1;
 ; CHECK-NEXT:    cvt.u64.u32 %rd14, %r4;
 ; CHECK-NEXT:    add.s64 %rd15, %rd14, 64;
 ; CHECK-NEXT:    selp.b64 %rd16, %rd13, %rd15, %p5;
@@ -184,8 +184,8 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) {
 ; CHECK-NEXT:    xor.b64 %rd19, %rd17, 127;
 ; CHECK-NEXT:    or.b64 %rd20, %rd19, %rd18;
 ; CHECK-NEXT:    setp.eq.b64 %p12, %rd20, 0;
-; CHECK-NEXT:    selp.b64 %rd65, 0, %rd6, %p11;
-; CHECK-NEXT:    selp.b64 %rd64, 0, %rd5, %p11;
+; CHECK-NEXT:    selp.b64 %rd65, 0, %rd2, %p11;
+; CHECK-NEXT:    selp.b64 %rd64, 0, %rd1, %p11;
 ; CHECK-NEXT:    or.pred %p13, %p11, %p12;
 ; CHECK-NEXT:    @%p13 bra $L__BB1_5;
 ; CHECK-NEXT:  // %bb.3: // %udiv-bb1
@@ -195,30 +195,30 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) {
 ; CHECK-NEXT:    setp.eq.b64 %p14, %rd21, 0;
 ; CHECK-NEXT:    cvt.u32.u64 %r5, %rd17;
 ; CHECK-NEXT:    sub.s32 %r6, 127, %r5;
-; CHECK-NEXT:    shl.b64 %rd22, %rd6, %r6;
+; CHECK-NEXT:    shl.b64 %rd22, %rd2, %r6;
 ; CHECK-NEXT:    sub.s32 %r7, 64, %r6;
-; CHECK-NEXT:    shr.u64 %rd23, %rd5, %r7;
+; CHECK-NEXT:    shr.u64 %rd23, %rd1, %r7;
 ; CHECK-NEXT:    or.b64 %rd24, %rd22, %rd23;
 ; CHECK-NEXT:    sub.s32 %r8, 63, %r5;
-; CHECK-NEXT:    shl.b64 %rd25, %rd5, %r8;
+; CHECK-NEXT:    shl.b64 %rd25, %rd1, %r8;
 ; CHECK-NEXT:    setp.gt.s32 %p15, %r6, 63;
 ; CHECK-NEXT:    selp.b64 %rd63, %rd25, %rd24, %p15;
-; CHECK-NEXT:    shl.b64 %rd62, %rd5, %r6;
+; CHECK-NEXT:    shl.b64 %rd62, %rd1, %r6;
 ; CHECK-NEXT:    mov.b64 %rd56, %rd57;
 ; CHECK-NEXT:    @%p14 bra $L__BB1_4;
 ; CHECK-NEXT:  // %bb.1: // %udiv-preheader
 ; CHECK-NEXT:    cvt.u32.u64 %r9, %rd58;
-; CHECK-NEXT:    shr.u64 %rd26, %rd5, %r9;
+; CHECK-NEXT:    shr.u64 %rd26, %rd1, %r9;
 ; CHECK-NEXT:    sub.s32 %r10, 64, %r9;
-; CHECK-NEXT:    shl.b64 %rd27, %rd6, %r10;
+; CHECK-NEXT:    shl.b64 %rd27, %rd2, %r10;
 ; CHECK-NEXT:    or.b64 %rd28, %rd26, %rd27;
 ; CHECK-NEXT:    add.s32 %r11, %r9, -64;
-; CHECK-NEXT:    shr.u64 %rd29, %rd6, %r11;
+; CHECK-NEXT:    shr.u64 %rd29, %rd2, %r11;
 ; CHECK-NEXT:    setp.gt.s32 %p16, %r9, 63;
 ; CHECK-NEXT:    selp.b64 %rd60, %rd29, %rd28, %p16;
-; CHECK-NEXT:    shr.u64 %rd61, %rd6, %r9;
-; CHECK-NEXT:    add.cc.s64 %rd3, %rd1, -1;
-; CHECK-NEXT:    addc.cc.s64 %rd4, %rd2, -1;
+; CHECK-NEXT:    shr.u64 %rd61, %rd2, %r9;
+; CHECK-NEXT:    add.cc.s64 %rd5, %rd3, -1;
+; CHECK-NEXT:    addc.cc.s64 %rd6, %rd4, -1;
 ; CHECK-NEXT:    mov.b64 %rd56, 0;
 ; CHECK-NEXT:    mov.b64 %rd57, %rd56;
 ; CHECK-NEXT:  $L__BB1_2: // %udiv-do-while
@@ -235,12 +235,12 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) {
 ; CHECK-NEXT:    shl.b64 %rd39, %rd62, 1;
 ; CHECK-NEXT:    or.b64 %rd62, %rd57, %rd39;
 ; CHECK-NEXT:    or.b64 %rd63, %rd56, %rd38;
-; CHECK-NEXT:    sub.cc.s64 %rd40, %rd3, %rd35;
-; CHECK-NEXT:    subc.cc.s64 %rd41, %rd4, %rd32;
+; CHECK-NEXT:    sub.cc.s64 %rd40, %rd5, %rd35;
+; CHECK-NEXT:    subc.cc.s64 %rd41, %rd6, %rd32;
 ; CHECK-NEXT:    shr.s64 %rd42, %rd41, 63;
 ; CHECK-NEXT:    and.b64 %rd57, %rd42, 1;
-; CHECK-NEXT:    and.b64 %rd43, %rd42, %rd1;
-; CHECK-NEXT:    and.b64 %rd44, %rd42, %rd2;
+; CHECK-NEXT:    and.b64 %rd43, %rd42, %rd3;
+; CHECK-NEXT:    and.b64 %rd44, %rd42, %rd4;
 ; CHECK-NEXT:    sub.cc.s64 %rd60, %rd35, %rd43;
 ; CHECK-NEXT:    subc.cc.s64 %rd61, %rd32, %rd44;
 ; CHECK-NEXT:    add.cc.s64 %rd58, %rd58, -1;
@@ -257,12 +257,12 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) {
 ; CHECK-NEXT:    or.b64 %rd64, %rd57, %rd49;
 ; CHECK-NEXT:    or.b64 %rd65, %rd56, %rd48;
 ; CHECK-NEXT:  $L__BB1_5: // %udiv-end
-; CHECK-NEXT:    mul.hi.u64 %rd50, %rd1, %rd64;
-; CHECK-NEXT:    mad.lo.s64 %rd51, %rd1, %rd65, %rd50;
-; CHECK-NEXT:    mad.lo.s64 %rd52, %rd2, %rd64, %rd51;
-; CHECK-NEXT:    mul.lo.s64 %rd53, %rd1, %rd64;
-; CHECK-NEXT:    sub.cc.s64 %rd54, %rd5, %rd53;
-; CHECK-NEXT:    subc.cc.s64 %rd55, %rd6, %rd52;
+; CHECK-NEXT:    mul.hi.u64 %rd50, %rd3, %rd64;
+; CHECK-NEXT:    mad.lo.s64 %rd51, %rd3, %rd65, %rd50;
+; CHECK-NEXT:    mad.lo.s64 %rd52, %rd4, %rd64, %rd51;
+; CHECK-NEXT:    mul.lo.s64 %rd53, %rd3, %rd64;
+; CHECK-NEXT:    sub.cc.s64 %rd54, %rd1, %rd53;
+; CHECK-NEXT:    subc.cc.s64 %rd55, %rd2, %rd52;
 ; CHECK-NEXT:    st.param.v2.b64 [func_retval0], {%rd54, %rd55};
 ; CHECK-NEXT:    ret;
   %div = urem i128 %lhs, %rhs
@@ -313,14 +313,14 @@ define i128 @sdiv_i128(i128 %lhs, i128 %rhs) {
 ; CHECK-NEXT:  // %bb.0: // %_udiv-special-cases
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd8, %rd9}, [sdiv_i128_param_0];
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd10, %rd11}, [sdiv_i128_param_1];
+; CHECK-NEXT:    setp.lt.s64 %p1, %rd9, 0;
 ; CHECK-NEXT:    sub.cc.s64 %rd12, 0, %rd8;
 ; CHECK-NEXT:    subc.cc.s64 %rd13, 0, %rd9;
-; CHECK-NEXT:    setp.lt.s64 %p1, %rd9, 0;
 ; CHECK-NEXT:    selp.b64 %rd2, %rd13, %rd9, %p1;
 ; CHECK-NEXT:    selp.b64 %rd1, %rd12, %rd8, %p1;
+; CHECK-NEXT:    setp.lt.s64 %p2, %rd11, 0;
 ; CHECK-NEXT:    sub.cc.s64 %rd14, 0, %rd10;
 ; CHECK-NEXT:    subc.cc.s64 %rd15, 0, %rd11;
-; CHECK-NEXT:    setp.lt.s64 %p2, %rd11, 0;
 ; CHECK-NEXT:    selp.b64 %rd4, %rd15, %rd11, %p2;
 ; CHECK-NEXT:    selp.b64 %rd3, %rd14, %rd10, %p2;
 ; CHECK-NEXT:    xor.b64 %rd16, %rd11, %rd9;
@@ -448,16 +448,16 @@ define i128 @udiv_i128(i128 %lhs, i128 %rhs) {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0: // %_udiv-special-cases
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd3, %rd4}, [udiv_i128_param_0];
-; CHECK-NEXT:    ld.param.v2.b64 {%rd5, %rd6}, [udiv_i128_param_1];
-; CHECK-NEXT:    or.b64 %rd7, %rd5, %rd6;
+; CHECK-NEXT:    ld.param.v2.b64 {%rd1, %rd2}, [udiv_i128_param_1];
+; CHECK-NEXT:    or.b64 %rd7, %rd1, %rd2;
 ; CHECK-NEXT:    setp.eq.b64 %p1, %rd7, 0;
 ; CHECK-NEXT:    or.b64 %rd8, %rd3, %rd4;
 ; CHECK-NEXT:    setp.eq.b64 %p2, %rd8, 0;
 ; CHECK-NEXT:    or.pred %p3, %p1, %p2;
-; CHECK-NEXT:    setp.ne.b64 %p4, %rd6, 0;
-; CHECK-NEXT:    clz.b64 %r1, %rd6;
+; CHECK-NEXT:    setp.ne.b64 %p4, %rd2, 0;
+; CHECK-NEXT:    clz.b64 %r1, %rd2;
 ; CHECK-NEXT:    cvt.u64.u32 %rd9, %r1;
-; CHECK-NEXT:    clz.b64 %r2, %rd5;
+; CHECK-NEXT:    clz.b64 %r2, %rd1;
 ; CHECK-NEXT:    cvt.u64.u32 %rd10, %r2;
 ; CHECK-NEXT:    add.s64 %rd11, %rd10, 64;
 ; CHECK-NEXT:    selp.b64 %rd12, %rd9, %rd11, %p4;
@@ -513,8 +513,8 @@ define i128 @udiv_i128(i128 %lhs, i128 %rhs) {
 ; CHECK-NEXT:    setp.gt.s32 %p16, %r9, 63;
 ; CHECK-NEXT:    selp.b64 %rd54, %rd29, %rd28, %p16;
 ; CHECK-NEXT:    shr.u64 %rd55, %rd4, %r9;
-; CHECK-NEXT:    add.cc.s64 %rd1, %rd5, -1;
-; CHECK-NEXT:    addc.cc.s64 %rd2, %rd6, -1;
+; CHECK-NEXT:    add.cc.s64 %rd5, %rd1, -1;
+; CHECK-NEXT:    addc.cc.s64 %rd6, %rd2, -1;
 ; CHECK-NEXT:    mov.b64 %rd50, 0;
 ; CHECK-NEXT:    mov.b64 %rd51, %rd50;
 ; CHECK-NEXT:  $L__BB5_2: // %udiv-do-while
@@ -531,12 +531,12 @@ define i128 @udiv_i128(i128 %lhs, i128 %rhs) {
 ; CHECK-NEXT:    shl.b64 %rd39, %rd56, 1;
 ; CHECK-NEXT:    or.b64 %rd56, %rd51, %rd39;
 ; CHECK-NEXT:    or.b64 %rd57, %rd50, %rd38;
-; CHECK-NEXT:    sub.cc.s64 %rd40, %rd1, %rd35;
-; CHECK-NEXT:    subc.cc.s64 %rd41, %rd2, %rd32;
+; CHECK-NEXT:    sub.cc.s64 %rd40, %rd5, %rd35;
+; CHECK-NEXT:    subc.cc.s64 %rd41, %rd6, %rd32;
 ; CHECK-NEXT:    shr.s64 %rd42, %rd41, 63;
 ; CHECK-NEXT:    and.b64 %rd51, %rd42, 1;
-; CHECK-NEXT:    and.b64 %rd43, %rd42, %rd5;
-; CHECK-NEXT:    and.b64 %rd44, %rd42, %rd6;
+; CHECK-NEXT:    and.b64 %rd43, %rd42, %rd1;
+; CHECK-NEXT:    and.b64 %rd44, %rd42, %rd2;
 ; CHECK-NEXT:    sub.cc.s64 %rd54, %rd35, %rd43;
 ; CHECK-NEXT:    subc.cc.s64 %rd55, %rd32, %rd44;
 ; CHECK-NEXT:    add.cc.s64 %rd52, %rd52, -1;
diff --git a/llvm/test/CodeGen/SystemZ/pr60413.ll b/llvm/test/CodeGen/SystemZ/pr60413.ll
index 8a6a30318ae58..45b0d74b8db6a 100644
--- a/llvm/test/CodeGen/SystemZ/pr60413.ll
+++ b/llvm/test/CodeGen/SystemZ/pr60413.ll
@@ -15,52 +15,48 @@ define dso_local void @m() local_unnamed_addr #1 {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    stmg %r13, %r15, 104(%r15)
 ; CHECK-NEXT:    aghi %r15, -168
-; CHECK-NEXT:    lhrl %r1, f+4
+; CHECK-NEXT:    llhrl %r1, f+4
 ; CHECK-NEXT:    sll %r1, 8
 ; CHECK-NEXT:    larl %r2, f
 ; CHECK-NEXT:    ic %r1, 6(%r2)
+; CHECK-NEXT:    vlvgp %v0, %r1, %r1
+; CHECK-NEXT:    vrepf %v1, %v0, 1
+; CHECK-NEXT:    vlr %v2, %v1
+; CHECK-NEXT:    vrepif %v0, 127
+; CHECK-NEXT:    vchlf %v3, %v1, %v0
+; CHECK-NEXT:    vlvgf %v1, %r0, 0
+; CHECK-NEXT:    vlvgf %v1, %r0, 1
+; CHECK-NEXT:    vlvgf %v1, %r0, 2
+; CHECK-NEXT:    vgbm %v4, 30583
+; CHECK-NEXT:    vn %v1, %v1, %v4
+; CHECK-NEXT:    vlvgf %v2, %r0, 3
+; CHECK-NEXT:    vn %v2, %v2, %v4
 ; CHECK-NEXT:    larl %r2, e
 ; CHECK-NEXT:    lb %r0, 3(%r2)
-; CHECK-NEXT:    vlvgp %v0, %r0, %r1
-; CHECK-NEXT:    vlvgp %v1, %r1, %r0
-; CHECK-NEXT:    vlvgf %v1, %r1, 0
-; CHECK-NEXT:    vlvgf %v1, %r1, 2
-; CHECK-NEXT:    vlvgp %v2, %r1, %r1
-; CHECK-NEXT:    # kill: def $r1l killed $r1l killed $r1d
-; CHECK-NEXT:    nilh %r1, 255
 ; CHECK-NEXT:    chi %r1, 128
 ; CHECK-NEXT:    ipm %r1
 ; CHECK-NEXT:    risbg %r1, %r1, 63, 191, 36
-; CHECK-NEXT:    vlvgf %v0, %r0, 0
-; CHECK-NEXT:    vlvgf %v0, %r0, 2
-; CHECK-NEXT:    vgbm %v3, 30583
-; CHECK-NEXT:    vn %v0, %v0, %v3
-; CHECK-NEXT:    vn %v1, %v1, %v3
-; CHECK-NEXT:    vrepf %v2, %v2, 1
-; CHECK-NEXT:    vn %v2, %v2, %v3
-; CHECK-NEXT:    vrepif %v3, 127
-; CHECK-NEXT:    vchlf %v1, %v1, %v3
-; CHECK-NEXT:    vlgvf %r13, %v1, 0
-; CHECK-NEXT:    vchlf %v2, %v2, %v3
-; CHECK-NEXT:    vlgvf %r3, %v2, 1
+; CHECK-NEXT:    vlgvf %r3, %v3, 1
 ; CHECK-NEXT:    nilf %r3, 1
-; CHECK-NEXT:    vlgvf %r4, %v2, 0
+; CHECK-NEXT:    vlgvf %r4, %v3, 0
 ; CHECK-NEXT:    risbg %r2, %r4, 48, 176, 15
 ; CHECK-NEXT:    rosbg %r2, %r3, 32, 49, 14
-; CHECK-NEXT:    vlgvf %r5, %v2, 2
+; CHECK-NEXT:    vlgvf %r5, %v3, 2
 ; CHECK-NEXT:    nilf %r5, 1
 ; CHECK-NEXT:    rosbg %r2, %r5, 32, 50, 13
-; CHECK-NEXT:    vlgvf %r14, %v2, 3
+; CHECK-NEXT:    vlgvf %r14, %v3, 3
 ; CHECK-NEXT:    nilf %r14, 1
 ; CHECK-NEXT:    rosbg %r2, %r14, 32, 51, 12
+; CHECK-NEXT:    vchlf %v2, %v2, %v0
+; CHECK-NEXT:    vlgvf %r13, %v2, 0
 ; CHECK-NEXT:    rosbg %r2, %r13, 52, 52, 11
-; CHECK-NEXT:    vlgvf %r13, %v1, 1
+; CHECK-NEXT:    vlgvf %r13, %v2, 1
 ; CHECK-NEXT:    rosbg %r2, %r13, 53, 53, 10
-; CHECK-NEXT:    vlgvf %r13, %v1, 2
+; CHECK-NEXT:    vlgvf %r13, %v2, 2
 ; CHECK-NEXT:    rosbg %r2, %r13, 54, 54, 9
-; CHECK-NEXT:    vlgvf %r13, %v1, 3
+; CHECK-NEXT:    vlgvf %r13, %v2, 3
 ; CHECK-NEXT:    rosbg %r2, %r13, 55, 55, 8
-; CHECK-NEXT:    vchlf %v0, %v0, %v3
+; CHECK-NEXT:    vchlf %v0, %v1, %v0
 ; CHECK-NEXT:    vlgvf %r13, %v0, 0
 ; CHECK-NEXT:    rosbg %r2, %r13, 56, 56, 7
 ; CHECK-NEXT:    vlgvf %r13, %v0, 1



More information about the llvm-commits mailing list