[llvm] 3b34f3f - AMDGPU/GlobalISel: Fix obvious bug in ported 32-bit udiv/urem

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 16 19:47:06 PDT 2020


Author: Matt Arsenault
Date: 2020-06-16T22:46:35-04:00
New Revision: 3b34f3fccac93697065df620d454fcadceb4fa6e

URL: https://github.com/llvm/llvm-project/commit/3b34f3fccac93697065df620d454fcadceb4fa6e
DIFF: https://github.com/llvm/llvm-project/commit/3b34f3fccac93697065df620d454fcadceb4fa6e.diff

LOG: AMDGPU/GlobalISel: Fix obvious bug in ported 32-bit udiv/urem

This was hidden by the IR expansion in AMDGPUCodeGenPrepare, which I
forgot to turn off.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 0ee1a6b716cc..902d960a9712 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -2814,7 +2814,7 @@ bool AMDGPULegalizerInfo::legalizeSDIV_SREM32(MachineInstr &MI,
 
   auto ThirtyOne = B.buildConstant(S32, 31);
   auto LHSign = B.buildAShr(S32, LHS, ThirtyOne);
-  auto RHSign = B.buildAShr(S32, LHS, ThirtyOne);
+  auto RHSign = B.buildAShr(S32, RHS, ThirtyOne);
 
   LHS = B.buildAdd(S32, LHS, LHSign).getReg(0);
   RHS = B.buildAdd(S32, RHS, RHSign).getReg(0);

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir
index 9e69ea9f89ae..8a569c68b1f3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir
@@ -14,7 +14,7 @@ body: |
     ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
-    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C]](s32)
     ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]]
     ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]]
     ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -54,7 +54,7 @@ body: |
     ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
-    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C]](s32)
     ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]]
     ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]]
     ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -94,7 +94,7 @@ body: |
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
-    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C]](s32)
     ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]]
     ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]]
     ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -148,7 +148,7 @@ body: |
     ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
     ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32)
-    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[C]](s32)
     ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]]
     ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]]
     ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -183,7 +183,7 @@ body: |
     ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
     ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
     ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
-    ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32)
     ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]]
     ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]]
     ; GFX6: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
@@ -224,7 +224,7 @@ body: |
     ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
     ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32)
-    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[C]](s32)
     ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]]
     ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]]
     ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -259,7 +259,7 @@ body: |
     ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
     ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
     ; GFX8: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
-    ; GFX8: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX8: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32)
     ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]]
     ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]]
     ; GFX8: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
@@ -300,7 +300,7 @@ body: |
     ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32)
-    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[C]](s32)
     ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]]
     ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]]
     ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -335,7 +335,7 @@ body: |
     ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]]
     ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]]
     ; GFX9: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
-    ; GFX9: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX9: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32)
     ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]]
     ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]]
     ; GFX9: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
@@ -456,7 +456,7 @@ body: |
     ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16
     ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
-    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
     ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
     ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
     ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -501,7 +501,7 @@ body: |
     ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16
     ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
-    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
     ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
     ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
     ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -546,7 +546,7 @@ body: |
     ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
-    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
     ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
     ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
     ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -611,7 +611,7 @@ body: |
     ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16
     ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32)
-    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C1]](s32)
     ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
     ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
     ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -650,7 +650,7 @@ body: |
     ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
     ; GFX6: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16
     ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
-    ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
+    ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32)
     ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]]
     ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]]
     ; GFX6: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
@@ -705,7 +705,7 @@ body: |
     ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16
     ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32)
-    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C1]](s32)
     ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
     ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
     ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -744,7 +744,7 @@ body: |
     ; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
     ; GFX8: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16
     ; GFX8: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
-    ; GFX8: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
+    ; GFX8: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32)
     ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]]
     ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]]
     ; GFX8: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
@@ -799,7 +799,7 @@ body: |
     ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16
     ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32)
-    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C1]](s32)
     ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
     ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
     ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -838,7 +838,7 @@ body: |
     ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
     ; GFX9: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16
     ; GFX9: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
-    ; GFX9: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
+    ; GFX9: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32)
     ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]]
     ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]]
     ; GFX9: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
@@ -895,7 +895,7 @@ body: |
     ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 7
     ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
-    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
     ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
     ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
     ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -940,7 +940,7 @@ body: |
     ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 7
     ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
-    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
     ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
     ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
     ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -985,7 +985,7 @@ body: |
     ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 7
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
-    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
     ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
     ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
     ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -1045,7 +1045,7 @@ body: |
     ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 17
     ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
-    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
     ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
     ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
     ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -1090,7 +1090,7 @@ body: |
     ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 17
     ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
-    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
     ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
     ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
     ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -1135,7 +1135,7 @@ body: |
     ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 17
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
-    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
     ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
     ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
     ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir
index 431126348001..0eb03135e18b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir
@@ -14,7 +14,7 @@ body: |
     ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
-    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C]](s32)
     ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]]
     ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]]
     ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -52,7 +52,7 @@ body: |
     ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
-    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C]](s32)
     ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]]
     ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]]
     ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -90,7 +90,7 @@ body: |
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
-    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[COPY1]], [[C]](s32)
     ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[ASHR]]
     ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[ASHR1]]
     ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -142,7 +142,7 @@ body: |
     ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
     ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32)
-    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[C]](s32)
     ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]]
     ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]]
     ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -175,7 +175,7 @@ body: |
     ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
     ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
     ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
-    ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32)
     ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]]
     ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]]
     ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
@@ -215,7 +215,7 @@ body: |
     ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
     ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32)
-    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[C]](s32)
     ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]]
     ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]]
     ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -248,7 +248,7 @@ body: |
     ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
     ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
     ; GFX8: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
-    ; GFX8: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX8: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32)
     ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]]
     ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]]
     ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
@@ -288,7 +288,7 @@ body: |
     ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32)
-    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[C]](s32)
     ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[ASHR]]
     ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[ASHR1]]
     ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -321,7 +321,7 @@ body: |
     ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]]
     ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]]
     ; GFX9: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
-    ; GFX9: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX9: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32)
     ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]]
     ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]]
     ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
@@ -441,7 +441,7 @@ body: |
     ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16
     ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
-    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
     ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
     ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
     ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -486,7 +486,7 @@ body: |
     ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16
     ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
-    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
     ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
     ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
     ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -531,7 +531,7 @@ body: |
     ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
-    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
     ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
     ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
     ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -596,7 +596,7 @@ body: |
     ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16
     ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32)
-    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C1]](s32)
     ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
     ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
     ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -633,7 +633,7 @@ body: |
     ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
     ; GFX6: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16
     ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
-    ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
+    ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32)
     ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]]
     ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]]
     ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
@@ -687,7 +687,7 @@ body: |
     ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16
     ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32)
-    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C1]](s32)
     ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
     ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
     ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -724,7 +724,7 @@ body: |
     ; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
     ; GFX8: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16
     ; GFX8: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
-    ; GFX8: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
+    ; GFX8: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32)
     ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]]
     ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]]
     ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
@@ -778,7 +778,7 @@ body: |
     ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16
     ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32)
-    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C1]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C1]](s32)
     ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
     ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
     ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -815,7 +815,7 @@ body: |
     ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
     ; GFX9: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16
     ; GFX9: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
-    ; GFX9: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32)
+    ; GFX9: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32)
     ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]]
     ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]]
     ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]]
@@ -871,7 +871,7 @@ body: |
     ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 7
     ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
-    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
     ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
     ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
     ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -914,7 +914,7 @@ body: |
     ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 7
     ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
-    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
     ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
     ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
     ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -957,7 +957,7 @@ body: |
     ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 7
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
-    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
     ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
     ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
     ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -1015,7 +1015,7 @@ body: |
     ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 17
     ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
-    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
     ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
     ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
     ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -1058,7 +1058,7 @@ body: |
     ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 17
     ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
-    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
     ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
     ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
     ; GFX8: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]
@@ -1101,7 +1101,7 @@ body: |
     ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 17
     ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31
     ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
-    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG]], [[C]](s32)
+    ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG1]], [[C]](s32)
     ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[ASHR]]
     ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG1]], [[ASHR1]]
     ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ADD]], [[ASHR]]

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
index d10c99e09f7b..19c359b9a4f7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
@@ -9,34 +9,35 @@ define i32 @v_sdiv_i32(i32 %num, i32 %den) {
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
 ; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v2
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v3, v1
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v3, v3
-; GISEL-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; GISEL-NEXT:    v_mul_lo_u32 v4, v3, v1
-; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v1
-; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, 0, v4
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v1
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v4
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    v_mul_lo_u32 v5, v4, v1
+; GISEL-NEXT:    v_mul_hi_u32 v6, v4, v1
+; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v5
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v6
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v5, v5, v4
+; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v4, v5
+; GISEL-NEXT:    v_sub_i32_e64 v4, s[4:5], v4, v5
 ; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
-; GISEL-NEXT:    v_mul_hi_u32 v4, v4, v3
-; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v3, v4
-; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
-; GISEL-NEXT:    v_mul_hi_u32 v3, v3, v0
-; GISEL-NEXT:    v_mul_lo_u32 v4, v3, v1
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, 1, v3
-; GISEL-NEXT:    v_subrev_i32_e32 v6, vcc, 1, v3
-; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v0, v4
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v1
+; GISEL-NEXT:    v_mul_hi_u32 v4, v4, v0
+; GISEL-NEXT:    v_mul_lo_u32 v5, v4, v1
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
+; GISEL-NEXT:    v_subrev_i32_e32 v7, vcc, 1, v4
+; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, v0, v5
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v5
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v1
 ; GISEL-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, v3, v5, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v1, v2, v2
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v4, v6, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v7, v0, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v1, v2, v3
 ; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v1
 ; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -99,16 +100,17 @@ define amdgpu_ps i32 @s_sdiv_i32(i32 inreg %num, i32 inreg %den) {
 ; GISEL-LABEL: s_sdiv_i32:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    s_ashr_i32 s2, s0, 31
+; GISEL-NEXT:    s_ashr_i32 s3, s1, 31
 ; GISEL-NEXT:    s_add_i32 s0, s0, s2
-; GISEL-NEXT:    s_add_i32 s1, s1, s2
-; GISEL-NEXT:    s_xor_b32 s3, s0, s2
-; GISEL-NEXT:    s_xor_b32 s4, s1, s2
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v0, s4
+; GISEL-NEXT:    s_add_i32 s1, s1, s3
+; GISEL-NEXT:    s_xor_b32 s4, s0, s2
+; GISEL-NEXT:    s_xor_b32 s5, s1, s3
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v0, s5
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v0, v0
 ; GISEL-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GISEL-NEXT:    v_mul_lo_u32 v1, v0, s4
-; GISEL-NEXT:    v_mul_hi_u32 v2, v0, s4
+; GISEL-NEXT:    v_mul_lo_u32 v1, v0, s5
+; GISEL-NEXT:    v_mul_hi_u32 v2, v0, s5
 ; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
 ; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
 ; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
@@ -116,17 +118,17 @@ define amdgpu_ps i32 @s_sdiv_i32(i32 inreg %num, i32 inreg %den) {
 ; GISEL-NEXT:    v_add_i32_e64 v2, s[0:1], v0, v1
 ; GISEL-NEXT:    v_sub_i32_e64 v0, s[0:1], v0, v1
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; GISEL-NEXT:    v_mul_hi_u32 v0, v0, s3
-; GISEL-NEXT:    v_mul_lo_u32 v1, v0, s4
+; GISEL-NEXT:    v_mul_hi_u32 v0, v0, s4
+; GISEL-NEXT:    v_mul_lo_u32 v1, v0, s5
 ; GISEL-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
 ; GISEL-NEXT:    v_subrev_i32_e32 v3, vcc, 1, v0
-; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, s3, v1
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, s3, v1
-; GISEL-NEXT:    v_cmp_le_u32_e64 s[0:1], s4, v4
+; GISEL-NEXT:    v_sub_i32_e32 v4, vcc, s4, v1
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, s4, v1
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[0:1], s5, v4
 ; GISEL-NEXT:    s_and_b64 s[0:1], s[0:1], vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v0, vcc
-; GISEL-NEXT:    s_xor_b32 s0, s2, s2
+; GISEL-NEXT:    s_xor_b32 s0, s2, s3
 ; GISEL-NEXT:    v_xor_b32_e32 v0, s0, v0
 ; GISEL-NEXT:    v_subrev_i32_e32 v0, vcc, s0, v0
 ; GISEL-NEXT:    v_readfirstlane_b32 s0, v0
@@ -192,17 +194,19 @@ define <2 x i32> @v_sdiv_v2i32(<2 x i32> %num, <2 x i32> %den) {
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
-; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
+; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v2
+; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; GISEL-NEXT:    v_xor_b32_e32 v6, v4, v4
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
-; GISEL-NEXT:    v_xor_b32_e32 v7, v5, v5
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
+; GISEL-NEXT:    v_xor_b32_e32 v8, v4, v5
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
+; GISEL-NEXT:    v_xor_b32_e32 v9, v6, v7
 ; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
-; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v4
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
-; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v5
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v5
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v2
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v3
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
@@ -211,48 +215,48 @@ define <2 x i32> @v_sdiv_v2i32(<2 x i32> %num, <2 x i32> %den) {
 ; GISEL-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v5
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT:    v_mul_lo_u32 v8, v4, v2
-; GISEL-NEXT:    v_mul_hi_u32 v9, v4, v2
+; GISEL-NEXT:    v_mul_lo_u32 v6, v4, v2
+; GISEL-NEXT:    v_mul_hi_u32 v7, v4, v2
 ; GISEL-NEXT:    v_mul_lo_u32 v10, v5, v3
 ; GISEL-NEXT:    v_mul_hi_u32 v11, v5, v3
-; GISEL-NEXT:    v_sub_i32_e32 v12, vcc, 0, v8
+; GISEL-NEXT:    v_sub_i32_e32 v12, vcc, 0, v6
 ; GISEL-NEXT:    v_sub_i32_e32 v13, vcc, 0, v10
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
-; GISEL-NEXT:    v_cndmask_b32_e32 v8, v8, v12, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v6, v6, v12, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, v10, v13, s[4:5]
-; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v4
-; GISEL-NEXT:    v_mul_hi_u32 v9, v9, v5
-; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v4, v8
-; GISEL-NEXT:    v_sub_i32_e64 v4, s[6:7], v4, v8
-; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v5, v9
-; GISEL-NEXT:    v_sub_i32_e64 v5, s[6:7], v5, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, v10, v13, s[4:5]
+; GISEL-NEXT:    v_mul_hi_u32 v6, v6, v4
+; GISEL-NEXT:    v_mul_hi_u32 v7, v7, v5
+; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v4, v6
+; GISEL-NEXT:    v_sub_i32_e64 v4, s[6:7], v4, v6
+; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v5, v7
+; GISEL-NEXT:    v_sub_i32_e64 v5, s[6:7], v5, v7
 ; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v8, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[4:5]
 ; GISEL-NEXT:    v_mul_hi_u32 v4, v4, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v5, v5, v1
-; GISEL-NEXT:    v_mul_lo_u32 v8, v4, v2
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v4
+; GISEL-NEXT:    v_mul_lo_u32 v6, v4, v2
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
 ; GISEL-NEXT:    v_subrev_i32_e32 v10, vcc, 1, v4
 ; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v3
 ; GISEL-NEXT:    v_add_i32_e32 v12, vcc, 1, v5
 ; GISEL-NEXT:    v_subrev_i32_e32 v13, vcc, 1, v5
-; GISEL-NEXT:    v_sub_i32_e32 v14, vcc, v0, v8
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v8
+; GISEL-NEXT:    v_sub_i32_e32 v14, vcc, v0, v6
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v6
 ; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v1, v11
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v11
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v14, v2
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[8:9], v0, v3
 ; GISEL-NEXT:    s_and_b64 s[6:7], s[6:7], vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, v4, v9, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v4, v7, s[6:7]
 ; GISEL-NEXT:    s_and_b64 s[6:7], s[8:9], s[4:5]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, v5, v12, s[6:7]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v10, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, v13, v1, s[4:5]
-; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v6
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v7
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v8
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v9
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v9
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_sdiv_v2i32:
@@ -353,35 +357,33 @@ define i32 @v_sdiv_i32_pow2k_denom(i32 %num) {
 ; CHECK-LABEL: v_sdiv_i32_pow2k_denom:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_movk_i32 s6, 0x1000
 ; CHECK-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 0x1000, v1
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, s6
 ; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
-; CHECK-NEXT:    v_xor_b32_e32 v2, v2, v1
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, v2
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v3, v3
-; CHECK-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; CHECK-NEXT:    v_mul_lo_u32 v4, v3, v2
-; CHECK-NEXT:    v_mul_hi_u32 v5, v3, v2
-; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, 0, v4
-; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
-; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
-; CHECK-NEXT:    v_mul_hi_u32 v4, v4, v3
-; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v3, v4
-; CHECK-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
+; CHECK-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v2
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; CHECK-NEXT:    v_lshlrev_b32_e32 v3, 12, v2
+; CHECK-NEXT:    v_mul_hi_u32 v4, v2, s6
+; CHECK-NEXT:    v_sub_i32_e32 v5, vcc, 0, v3
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
 ; CHECK-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
-; CHECK-NEXT:    v_mul_hi_u32 v3, v3, v0
-; CHECK-NEXT:    v_mul_lo_u32 v4, v3, v2
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, 1, v3
-; CHECK-NEXT:    v_subrev_i32_e32 v6, vcc, 1, v3
-; CHECK-NEXT:    v_sub_i32_e32 v7, vcc, v0, v4
-; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v2
+; CHECK-NEXT:    v_mul_hi_u32 v3, v3, v2
+; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v2, v3
+; CHECK-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v2, v2, v0
+; CHECK-NEXT:    v_lshlrev_b32_e32 v3, 12, v2
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
+; CHECK-NEXT:    v_subrev_i32_e32 v5, vcc, 1, v2
+; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, v0, v3
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v6
 ; CHECK-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
-; CHECK-NEXT:    v_cndmask_b32_e64 v0, v3, v5, s[4:5]
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
-; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v1
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, v2, v4, s[4:5]
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
@@ -390,74 +392,128 @@ define i32 @v_sdiv_i32_pow2k_denom(i32 %num) {
 }
 
 define <2 x i32> @v_sdiv_v2i32_pow2k_denom(<2 x i32> %num) {
-; CHECK-LABEL: v_sdiv_v2i32_pow2k_denom:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_movk_i32 s4, 0x1000
-; CHECK-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
-; CHECK-NEXT:    v_mov_b32_e32 v3, 0x1000
-; CHECK-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, s4, v2
-; CHECK-NEXT:    v_xor_b32_e32 v6, v2, v2
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; CHECK-NEXT:    v_xor_b32_e32 v7, v4, v4
-; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
-; CHECK-NEXT:    v_xor_b32_e32 v2, v5, v2
-; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v4
-; CHECK-NEXT:    v_xor_b32_e32 v3, v3, v4
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v4, v2
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v5, v3
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v5, v5
-; CHECK-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v4
-; CHECK-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v5
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v4
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; CHECK-NEXT:    v_mul_lo_u32 v8, v4, v2
-; CHECK-NEXT:    v_mul_hi_u32 v9, v4, v2
-; CHECK-NEXT:    v_mul_lo_u32 v10, v5, v3
-; CHECK-NEXT:    v_mul_hi_u32 v11, v5, v3
-; CHECK-NEXT:    v_sub_i32_e32 v12, vcc, 0, v8
-; CHECK-NEXT:    v_sub_i32_e32 v13, vcc, 0, v10
-; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
-; CHECK-NEXT:    v_cndmask_b32_e32 v8, v8, v12, vcc
-; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, v10, v13, s[4:5]
-; CHECK-NEXT:    v_mul_hi_u32 v8, v8, v4
-; CHECK-NEXT:    v_mul_hi_u32 v9, v9, v5
-; CHECK-NEXT:    v_add_i32_e64 v10, s[6:7], v4, v8
-; CHECK-NEXT:    v_sub_i32_e64 v4, s[6:7], v4, v8
-; CHECK-NEXT:    v_add_i32_e64 v8, s[6:7], v5, v9
-; CHECK-NEXT:    v_sub_i32_e64 v5, s[6:7], v5, v9
-; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, v5, v8, s[4:5]
-; CHECK-NEXT:    v_mul_hi_u32 v4, v4, v0
-; CHECK-NEXT:    v_mul_hi_u32 v5, v5, v1
-; CHECK-NEXT:    v_mul_lo_u32 v8, v4, v2
-; CHECK-NEXT:    v_add_i32_e32 v9, vcc, 1, v4
-; CHECK-NEXT:    v_subrev_i32_e32 v10, vcc, 1, v4
-; CHECK-NEXT:    v_mul_lo_u32 v11, v5, v3
-; CHECK-NEXT:    v_add_i32_e32 v12, vcc, 1, v5
-; CHECK-NEXT:    v_subrev_i32_e32 v13, vcc, 1, v5
-; CHECK-NEXT:    v_sub_i32_e32 v14, vcc, v0, v8
-; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v8
-; CHECK-NEXT:    v_sub_i32_e64 v0, s[4:5], v1, v11
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v11
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[6:7], v14, v2
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[8:9], v0, v3
-; CHECK-NEXT:    s_and_b64 s[6:7], s[6:7], vcc
-; CHECK-NEXT:    v_cndmask_b32_e64 v0, v4, v9, s[6:7]
-; CHECK-NEXT:    s_and_b64 s[6:7], s[8:9], s[4:5]
-; CHECK-NEXT:    v_cndmask_b32_e64 v1, v5, v12, s[6:7]
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v10, v0, vcc
-; CHECK-NEXT:    v_cndmask_b32_e64 v1, v13, v1, s[4:5]
-; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v6
-; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v7
-; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
-; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-LABEL: v_sdiv_v2i32_pow2k_denom:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; GISEL-NEXT:    s_add_i32 s8, 0x1000, 0
+; GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s8
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v4
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v4
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    v_mul_lo_u32 v6, v5, s8
+; GISEL-NEXT:    v_mul_hi_u32 v7, v5, s8
+; GISEL-NEXT:    v_mul_lo_u32 v8, v4, s8
+; GISEL-NEXT:    v_mul_hi_u32 v9, v4, s8
+; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, 0, v6
+; GISEL-NEXT:    v_sub_i32_e32 v11, vcc, 0, v8
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, v8, v11, s[4:5]
+; GISEL-NEXT:    v_mul_hi_u32 v6, v6, v5
+; GISEL-NEXT:    v_mul_hi_u32 v7, v7, v4
+; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v5, v6
+; GISEL-NEXT:    v_sub_i32_e64 v5, s[6:7], v5, v6
+; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v4, v7
+; GISEL-NEXT:    v_sub_i32_e64 v4, s[6:7], v4, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v8, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, v4, v6, s[4:5]
+; GISEL-NEXT:    v_mul_hi_u32 v5, v5, v0
+; GISEL-NEXT:    v_mul_hi_u32 v4, v4, v1
+; GISEL-NEXT:    v_mul_lo_u32 v6, v5, s8
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
+; GISEL-NEXT:    v_subrev_i32_e32 v8, vcc, 1, v5
+; GISEL-NEXT:    v_mul_lo_u32 v9, v4, s8
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, 1, v4
+; GISEL-NEXT:    v_subrev_i32_e32 v11, vcc, 1, v4
+; GISEL-NEXT:    v_sub_i32_e32 v12, vcc, v0, v6
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v6
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v1, v9
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v9
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], s8, v12
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[8:9], s8, v0
+; GISEL-NEXT:    s_and_b64 s[6:7], s[6:7], vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v5, v7, s[6:7]
+; GISEL-NEXT:    s_and_b64 s[6:7], s[8:9], s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v4, v10, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v11, v1, s[4:5]
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_sdiv_v2i32_pow2k_denom:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_movk_i32 s4, 0x1000
+; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; CGP-NEXT:    v_mov_b32_e32 v3, 0x1000
+; CGP-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; CGP-NEXT:    v_cvt_f32_u32_e32 v5, s4
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
+; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v3
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, v5
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v4
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v6, v6
+; CGP-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v5
+; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f800000, v6
+; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; CGP-NEXT:    v_lshlrev_b32_e32 v7, 12, v5
+; CGP-NEXT:    v_mul_hi_u32 v8, v5, s4
+; CGP-NEXT:    v_lshlrev_b32_e32 v9, 12, v6
+; CGP-NEXT:    v_mul_hi_u32 v10, v6, v3
+; CGP-NEXT:    v_sub_i32_e32 v11, vcc, 0, v7
+; CGP-NEXT:    v_sub_i32_e32 v12, vcc, 0, v9
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v8
+; CGP-NEXT:    v_cndmask_b32_e32 v7, v7, v11, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v8, v9, v12, s[4:5]
+; CGP-NEXT:    v_mul_hi_u32 v7, v7, v5
+; CGP-NEXT:    v_mul_hi_u32 v8, v8, v6
+; CGP-NEXT:    v_add_i32_e64 v9, s[6:7], v5, v7
+; CGP-NEXT:    v_sub_i32_e64 v5, s[6:7], v5, v7
+; CGP-NEXT:    v_add_i32_e64 v7, s[6:7], v6, v8
+; CGP-NEXT:    v_sub_i32_e64 v6, s[6:7], v6, v8
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v5, v9, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[4:5]
+; CGP-NEXT:    v_mul_hi_u32 v5, v5, v0
+; CGP-NEXT:    v_mul_hi_u32 v6, v6, v1
+; CGP-NEXT:    v_lshlrev_b32_e32 v7, 12, v5
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v5
+; CGP-NEXT:    v_subrev_i32_e32 v9, vcc, 1, v5
+; CGP-NEXT:    v_lshlrev_b32_e32 v10, 12, v6
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, 1, v6
+; CGP-NEXT:    v_subrev_i32_e32 v12, vcc, 1, v6
+; CGP-NEXT:    v_sub_i32_e32 v13, vcc, v0, v7
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v7
+; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v1, v10
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v10
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[6:7], v13, v3
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[8:9], v0, v3
+; CGP-NEXT:    s_and_b64 s[6:7], s[6:7], vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v5, v8, s[6:7]
+; CGP-NEXT:    s_and_b64 s[6:7], s[8:9], s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e64 v1, v6, v11, s[6:7]
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v9, v0, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v1, v12, v1, s[4:5]
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v4
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
+; CGP-NEXT:    s_setpc_b64 s[30:31]
   %result = sdiv <2 x i32> %num, <i32 4096, i32 4096>
   ret <2 x i32> %result
 }
@@ -466,35 +522,33 @@ define i32 @v_sdiv_i32_oddk_denom(i32 %num) {
 ; CHECK-LABEL: v_sdiv_i32_oddk_denom:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b32 s6, 0x12d8fb
 ; CHECK-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 0x12d8fb, v1
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, s6
 ; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
-; CHECK-NEXT:    v_xor_b32_e32 v2, v2, v1
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, v2
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v3, v3
-; CHECK-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; CHECK-NEXT:    v_mul_lo_u32 v4, v3, v2
-; CHECK-NEXT:    v_mul_hi_u32 v5, v3, v2
-; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, 0, v4
-; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
-; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
-; CHECK-NEXT:    v_mul_hi_u32 v4, v4, v3
-; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v3, v4
-; CHECK-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
+; CHECK-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v2
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; CHECK-NEXT:    v_mul_lo_u32 v3, v2, s6
+; CHECK-NEXT:    v_mul_hi_u32 v4, v2, s6
+; CHECK-NEXT:    v_sub_i32_e32 v5, vcc, 0, v3
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v4
 ; CHECK-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
-; CHECK-NEXT:    v_mul_hi_u32 v3, v3, v0
-; CHECK-NEXT:    v_mul_lo_u32 v4, v3, v2
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, 1, v3
-; CHECK-NEXT:    v_subrev_i32_e32 v6, vcc, 1, v3
-; CHECK-NEXT:    v_sub_i32_e32 v7, vcc, v0, v4
-; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v2
+; CHECK-NEXT:    v_mul_hi_u32 v3, v3, v2
+; CHECK-NEXT:    v_add_i32_e64 v4, s[4:5], v2, v3
+; CHECK-NEXT:    v_sub_i32_e64 v2, s[4:5], v2, v3
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v2, v2, v0
+; CHECK-NEXT:    v_mul_lo_u32 v3, v2, s6
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
+; CHECK-NEXT:    v_subrev_i32_e32 v5, vcc, 1, v2
+; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, v0, v3
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
+; CHECK-NEXT:    v_cmp_le_u32_e64 s[4:5], s6, v6
 ; CHECK-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
-; CHECK-NEXT:    v_cndmask_b32_e64 v0, v3, v5, s[4:5]
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
-; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v1
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, v2, v4, s[4:5]
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
 ; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
@@ -503,74 +557,128 @@ define i32 @v_sdiv_i32_oddk_denom(i32 %num) {
 }
 
 define <2 x i32> @v_sdiv_v2i32_oddk_denom(<2 x i32> %num) {
-; CHECK-LABEL: v_sdiv_v2i32_oddk_denom:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_mov_b32 s4, 0x12d8fb
-; CHECK-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
-; CHECK-NEXT:    v_mov_b32_e32 v3, 0x12d8fb
-; CHECK-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, s4, v2
-; CHECK-NEXT:    v_xor_b32_e32 v6, v2, v2
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; CHECK-NEXT:    v_xor_b32_e32 v7, v4, v4
-; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
-; CHECK-NEXT:    v_xor_b32_e32 v2, v5, v2
-; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v4
-; CHECK-NEXT:    v_xor_b32_e32 v3, v3, v4
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v4, v2
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v5, v3
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v5, v5
-; CHECK-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v4
-; CHECK-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v5
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v4
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; CHECK-NEXT:    v_mul_lo_u32 v8, v4, v2
-; CHECK-NEXT:    v_mul_hi_u32 v9, v4, v2
-; CHECK-NEXT:    v_mul_lo_u32 v10, v5, v3
-; CHECK-NEXT:    v_mul_hi_u32 v11, v5, v3
-; CHECK-NEXT:    v_sub_i32_e32 v12, vcc, 0, v8
-; CHECK-NEXT:    v_sub_i32_e32 v13, vcc, 0, v10
-; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
-; CHECK-NEXT:    v_cndmask_b32_e32 v8, v8, v12, vcc
-; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, v10, v13, s[4:5]
-; CHECK-NEXT:    v_mul_hi_u32 v8, v8, v4
-; CHECK-NEXT:    v_mul_hi_u32 v9, v9, v5
-; CHECK-NEXT:    v_add_i32_e64 v10, s[6:7], v4, v8
-; CHECK-NEXT:    v_sub_i32_e64 v4, s[6:7], v4, v8
-; CHECK-NEXT:    v_add_i32_e64 v8, s[6:7], v5, v9
-; CHECK-NEXT:    v_sub_i32_e64 v5, s[6:7], v5, v9
-; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc
-; CHECK-NEXT:    v_cndmask_b32_e64 v5, v5, v8, s[4:5]
-; CHECK-NEXT:    v_mul_hi_u32 v4, v4, v0
-; CHECK-NEXT:    v_mul_hi_u32 v5, v5, v1
-; CHECK-NEXT:    v_mul_lo_u32 v8, v4, v2
-; CHECK-NEXT:    v_add_i32_e32 v9, vcc, 1, v4
-; CHECK-NEXT:    v_subrev_i32_e32 v10, vcc, 1, v4
-; CHECK-NEXT:    v_mul_lo_u32 v11, v5, v3
-; CHECK-NEXT:    v_add_i32_e32 v12, vcc, 1, v5
-; CHECK-NEXT:    v_subrev_i32_e32 v13, vcc, 1, v5
-; CHECK-NEXT:    v_sub_i32_e32 v14, vcc, v0, v8
-; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v8
-; CHECK-NEXT:    v_sub_i32_e64 v0, s[4:5], v1, v11
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v11
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[6:7], v14, v2
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[8:9], v0, v3
-; CHECK-NEXT:    s_and_b64 s[6:7], s[6:7], vcc
-; CHECK-NEXT:    v_cndmask_b32_e64 v0, v4, v9, s[6:7]
-; CHECK-NEXT:    s_and_b64 s[6:7], s[8:9], s[4:5]
-; CHECK-NEXT:    v_cndmask_b32_e64 v1, v5, v12, s[6:7]
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v10, v0, vcc
-; CHECK-NEXT:    v_cndmask_b32_e64 v1, v13, v1, s[4:5]
-; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v6
-; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v7
-; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
-; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-LABEL: v_sdiv_v2i32_oddk_denom:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; GISEL-NEXT:    s_add_i32 s8, 0x12d8fb, 0
+; GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s8
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v4
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v4
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    v_mul_lo_u32 v6, v5, s8
+; GISEL-NEXT:    v_mul_hi_u32 v7, v5, s8
+; GISEL-NEXT:    v_mul_lo_u32 v8, v4, s8
+; GISEL-NEXT:    v_mul_hi_u32 v9, v4, s8
+; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, 0, v6
+; GISEL-NEXT:    v_sub_i32_e32 v11, vcc, 0, v8
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, v8, v11, s[4:5]
+; GISEL-NEXT:    v_mul_hi_u32 v6, v6, v5
+; GISEL-NEXT:    v_mul_hi_u32 v7, v7, v4
+; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v5, v6
+; GISEL-NEXT:    v_sub_i32_e64 v5, s[6:7], v5, v6
+; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v4, v7
+; GISEL-NEXT:    v_sub_i32_e64 v4, s[6:7], v4, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v8, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, v4, v6, s[4:5]
+; GISEL-NEXT:    v_mul_hi_u32 v5, v5, v0
+; GISEL-NEXT:    v_mul_hi_u32 v4, v4, v1
+; GISEL-NEXT:    v_mul_lo_u32 v6, v5, s8
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v5
+; GISEL-NEXT:    v_subrev_i32_e32 v8, vcc, 1, v5
+; GISEL-NEXT:    v_mul_lo_u32 v9, v4, s8
+; GISEL-NEXT:    v_add_i32_e32 v10, vcc, 1, v4
+; GISEL-NEXT:    v_subrev_i32_e32 v11, vcc, 1, v4
+; GISEL-NEXT:    v_sub_i32_e32 v12, vcc, v0, v6
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v6
+; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v1, v9
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v9
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], s8, v12
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[8:9], s8, v0
+; GISEL-NEXT:    s_and_b64 s[6:7], s[6:7], vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v5, v7, s[6:7]
+; GISEL-NEXT:    s_and_b64 s[6:7], s[8:9], s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v4, v10, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v11, v1, s[4:5]
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_sdiv_v2i32_oddk_denom:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_mov_b32 s8, 0x12d8fb
+; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; CGP-NEXT:    v_mov_b32_e32 v3, 0x12d8fb
+; CGP-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; CGP-NEXT:    v_cvt_f32_u32_e32 v5, s8
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
+; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v3
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, v5
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v4
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v6, v6
+; CGP-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v5
+; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f800000, v6
+; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; CGP-NEXT:    v_mul_lo_u32 v7, v5, s8
+; CGP-NEXT:    v_mul_hi_u32 v8, v5, s8
+; CGP-NEXT:    v_mul_lo_u32 v9, v6, v3
+; CGP-NEXT:    v_mul_hi_u32 v10, v6, v3
+; CGP-NEXT:    v_sub_i32_e32 v11, vcc, 0, v7
+; CGP-NEXT:    v_sub_i32_e32 v12, vcc, 0, v9
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v8
+; CGP-NEXT:    v_cndmask_b32_e32 v7, v7, v11, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v8, v9, v12, s[4:5]
+; CGP-NEXT:    v_mul_hi_u32 v7, v7, v5
+; CGP-NEXT:    v_mul_hi_u32 v8, v8, v6
+; CGP-NEXT:    v_add_i32_e64 v9, s[6:7], v5, v7
+; CGP-NEXT:    v_sub_i32_e64 v5, s[6:7], v5, v7
+; CGP-NEXT:    v_add_i32_e64 v7, s[6:7], v6, v8
+; CGP-NEXT:    v_sub_i32_e64 v6, s[6:7], v6, v8
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v5, v9, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[4:5]
+; CGP-NEXT:    v_mul_hi_u32 v5, v5, v0
+; CGP-NEXT:    v_mul_hi_u32 v6, v6, v1
+; CGP-NEXT:    v_mul_lo_u32 v7, v5, s8
+; CGP-NEXT:    v_add_i32_e32 v8, vcc, 1, v5
+; CGP-NEXT:    v_subrev_i32_e32 v9, vcc, 1, v5
+; CGP-NEXT:    v_mul_lo_u32 v10, v6, v3
+; CGP-NEXT:    v_add_i32_e32 v11, vcc, 1, v6
+; CGP-NEXT:    v_subrev_i32_e32 v12, vcc, 1, v6
+; CGP-NEXT:    v_sub_i32_e32 v13, vcc, v0, v7
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v7
+; CGP-NEXT:    v_sub_i32_e64 v0, s[4:5], v1, v10
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v10
+; CGP-NEXT:    v_cmp_le_u32_e64 s[6:7], s8, v13
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[8:9], v0, v3
+; CGP-NEXT:    s_and_b64 s[6:7], s[6:7], vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v5, v8, s[6:7]
+; CGP-NEXT:    s_and_b64 s[6:7], s[8:9], s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e64 v1, v6, v11, s[6:7]
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v9, v0, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v1, v12, v1, s[4:5]
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v4
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
+; CGP-NEXT:    s_setpc_b64 s[30:31]
   %result = sdiv <2 x i32> %num, <i32 1235195, i32 1235195>
   ret <2 x i32> %result
 }
@@ -581,34 +689,35 @@ define i32 @v_sdiv_i32_pow2_shl_denom(i32 %x, i32 %y) {
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    v_lshl_b32_e32 v1, 0x1000, v1
 ; CHECK-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; CHECK-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
 ; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
-; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v2
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, v1
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v3, v3
-; CHECK-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; CHECK-NEXT:    v_mul_lo_u32 v4, v3, v1
-; CHECK-NEXT:    v_mul_hi_u32 v5, v3, v1
-; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, 0, v4
-; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v3
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v4, v1
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; CHECK-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v4
+; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; CHECK-NEXT:    v_mul_lo_u32 v5, v4, v1
+; CHECK-NEXT:    v_mul_hi_u32 v6, v4, v1
+; CHECK-NEXT:    v_sub_i32_e32 v7, vcc, 0, v5
+; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v6
+; CHECK-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
+; CHECK-NEXT:    v_mul_hi_u32 v5, v5, v4
+; CHECK-NEXT:    v_add_i32_e64 v6, s[4:5], v4, v5
+; CHECK-NEXT:    v_sub_i32_e64 v4, s[4:5], v4, v5
 ; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
-; CHECK-NEXT:    v_mul_hi_u32 v4, v4, v3
-; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v3, v4
-; CHECK-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
-; CHECK-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
-; CHECK-NEXT:    v_mul_hi_u32 v3, v3, v0
-; CHECK-NEXT:    v_mul_lo_u32 v4, v3, v1
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, 1, v3
-; CHECK-NEXT:    v_subrev_i32_e32 v6, vcc, 1, v3
-; CHECK-NEXT:    v_sub_i32_e32 v7, vcc, v0, v4
-; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v1
+; CHECK-NEXT:    v_mul_hi_u32 v4, v4, v0
+; CHECK-NEXT:    v_mul_lo_u32 v5, v4, v1
+; CHECK-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
+; CHECK-NEXT:    v_subrev_i32_e32 v7, vcc, 1, v4
+; CHECK-NEXT:    v_sub_i32_e32 v8, vcc, v0, v5
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v5
+; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v1
 ; CHECK-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
-; CHECK-NEXT:    v_cndmask_b32_e64 v0, v3, v5, s[4:5]
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
-; CHECK-NEXT:    v_xor_b32_e32 v1, v2, v2
+; CHECK-NEXT:    v_cndmask_b32_e64 v0, v4, v6, s[4:5]
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v7, v0, vcc
+; CHECK-NEXT:    v_xor_b32_e32 v1, v2, v3
 ; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
@@ -627,49 +736,51 @@ define <2 x i32> @v_sdiv_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) {
 ; GISEL-NEXT:    v_lshl_b32_e32 v2, s4, v2
 ; GISEL-NEXT:    v_lshl_b32_e32 v3, s4, v3
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; GISEL-NEXT:    v_xor_b32_e32 v6, v4, v4
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
-; GISEL-NEXT:    v_xor_b32_e32 v7, v5, v5
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
+; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v2
 ; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
 ; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
-; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v4
-; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v5
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v2
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v3
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v5
-; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v4
-; GISEL-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v5
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT:    v_mul_lo_u32 v8, v4, v2
-; GISEL-NEXT:    v_mul_hi_u32 v9, v4, v2
-; GISEL-NEXT:    v_mul_lo_u32 v10, v5, v3
-; GISEL-NEXT:    v_mul_hi_u32 v11, v5, v3
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
+; GISEL-NEXT:    v_xor_b32_e32 v4, v4, v6
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
+; GISEL-NEXT:    v_xor_b32_e32 v5, v5, v7
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v6
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v2
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v7, v3
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v7, v7
+; GISEL-NEXT:    v_mul_f32_e32 v6, 0x4f800000, v6
+; GISEL-NEXT:    v_mul_f32_e32 v7, 0x4f800000, v7
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
+; GISEL-NEXT:    v_mul_lo_u32 v8, v6, v2
+; GISEL-NEXT:    v_mul_hi_u32 v9, v6, v2
+; GISEL-NEXT:    v_mul_lo_u32 v10, v7, v3
+; GISEL-NEXT:    v_mul_hi_u32 v11, v7, v3
 ; GISEL-NEXT:    v_sub_i32_e32 v12, vcc, 0, v8
 ; GISEL-NEXT:    v_sub_i32_e32 v13, vcc, 0, v10
 ; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
 ; GISEL-NEXT:    v_cndmask_b32_e32 v8, v8, v12, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
 ; GISEL-NEXT:    v_cndmask_b32_e64 v9, v10, v13, s[4:5]
-; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v4
-; GISEL-NEXT:    v_mul_hi_u32 v9, v9, v5
-; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v4, v8
-; GISEL-NEXT:    v_sub_i32_e64 v4, s[6:7], v4, v8
-; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v5, v9
-; GISEL-NEXT:    v_sub_i32_e64 v5, s[6:7], v5, v9
-; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v8, s[4:5]
-; GISEL-NEXT:    v_mul_hi_u32 v4, v4, v0
-; GISEL-NEXT:    v_mul_hi_u32 v5, v5, v1
-; GISEL-NEXT:    v_mul_lo_u32 v8, v4, v2
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v4
-; GISEL-NEXT:    v_subrev_i32_e32 v10, vcc, 1, v4
-; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v3
-; GISEL-NEXT:    v_add_i32_e32 v12, vcc, 1, v5
-; GISEL-NEXT:    v_subrev_i32_e32 v13, vcc, 1, v5
+; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v6
+; GISEL-NEXT:    v_mul_hi_u32 v9, v9, v7
+; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v6, v8
+; GISEL-NEXT:    v_sub_i32_e64 v6, s[6:7], v6, v8
+; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v7, v9
+; GISEL-NEXT:    v_sub_i32_e64 v7, s[6:7], v7, v9
+; GISEL-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, v7, v8, s[4:5]
+; GISEL-NEXT:    v_mul_hi_u32 v6, v6, v0
+; GISEL-NEXT:    v_mul_hi_u32 v7, v7, v1
+; GISEL-NEXT:    v_mul_lo_u32 v8, v6, v2
+; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v6
+; GISEL-NEXT:    v_subrev_i32_e32 v10, vcc, 1, v6
+; GISEL-NEXT:    v_mul_lo_u32 v11, v7, v3
+; GISEL-NEXT:    v_add_i32_e32 v12, vcc, 1, v7
+; GISEL-NEXT:    v_subrev_i32_e32 v13, vcc, 1, v7
 ; GISEL-NEXT:    v_sub_i32_e32 v14, vcc, v0, v8
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v8
 ; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v1, v11
@@ -677,15 +788,15 @@ define <2 x i32> @v_sdiv_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) {
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v14, v2
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[8:9], v0, v3
 ; GISEL-NEXT:    s_and_b64 s[6:7], s[6:7], vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, v4, v9, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v6, v9, s[6:7]
 ; GISEL-NEXT:    s_and_b64 s[6:7], s[8:9], s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e64 v1, v5, v12, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v7, v12, s[6:7]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v10, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, v13, v1, s[4:5]
-; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v6
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v7
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_sdiv_v2i32_pow2_shl_denom:
@@ -794,34 +905,35 @@ define i32 @v_sdiv_i32_24bit(i32 %num, i32 %den) {
 ; GISEL-NEXT:    v_and_b32_e32 v0, s4, v0
 ; GISEL-NEXT:    v_and_b32_e32 v1, s4, v1
 ; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
 ; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v2
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v3, v1
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v3, v3
-; GISEL-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; GISEL-NEXT:    v_mul_lo_u32 v4, v3, v1
-; GISEL-NEXT:    v_mul_hi_u32 v5, v3, v1
-; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, 0, v4
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v1
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v4
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    v_mul_lo_u32 v5, v4, v1
+; GISEL-NEXT:    v_mul_hi_u32 v6, v4, v1
+; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, 0, v5
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v6
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
+; GISEL-NEXT:    v_mul_hi_u32 v5, v5, v4
+; GISEL-NEXT:    v_add_i32_e64 v6, s[4:5], v4, v5
+; GISEL-NEXT:    v_sub_i32_e64 v4, s[4:5], v4, v5
 ; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
-; GISEL-NEXT:    v_mul_hi_u32 v4, v4, v3
-; GISEL-NEXT:    v_add_i32_e64 v5, s[4:5], v3, v4
-; GISEL-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
-; GISEL-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
-; GISEL-NEXT:    v_mul_hi_u32 v3, v3, v0
-; GISEL-NEXT:    v_mul_lo_u32 v4, v3, v1
-; GISEL-NEXT:    v_add_i32_e32 v5, vcc, 1, v3
-; GISEL-NEXT:    v_subrev_i32_e32 v6, vcc, 1, v3
-; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v0, v4
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v7, v1
+; GISEL-NEXT:    v_mul_hi_u32 v4, v4, v0
+; GISEL-NEXT:    v_mul_lo_u32 v5, v4, v1
+; GISEL-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
+; GISEL-NEXT:    v_subrev_i32_e32 v7, vcc, 1, v4
+; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, v0, v5
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v5
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v8, v1
 ; GISEL-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, v3, v5, s[4:5]
-; GISEL-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
-; GISEL-NEXT:    v_xor_b32_e32 v1, v2, v2
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v4, v6, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v7, v0, vcc
+; GISEL-NEXT:    v_xor_b32_e32 v1, v2, v3
 ; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v1
 ; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
@@ -883,17 +995,19 @@ define <2 x i32> @v_sdiv_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
 ; GISEL-NEXT:    v_and_b32_e32 v2, s4, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, s4, v3
 ; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
-; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
+; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v2
+; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; GISEL-NEXT:    v_xor_b32_e32 v6, v4, v4
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
-; GISEL-NEXT:    v_xor_b32_e32 v7, v5, v5
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
+; GISEL-NEXT:    v_xor_b32_e32 v8, v4, v5
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
+; GISEL-NEXT:    v_xor_b32_e32 v9, v6, v7
 ; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
-; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v4
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
-; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v5
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v5
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, v2
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v3
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
@@ -902,48 +1016,48 @@ define <2 x i32> @v_sdiv_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
 ; GISEL-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v5
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; GISEL-NEXT:    v_mul_lo_u32 v8, v4, v2
-; GISEL-NEXT:    v_mul_hi_u32 v9, v4, v2
+; GISEL-NEXT:    v_mul_lo_u32 v6, v4, v2
+; GISEL-NEXT:    v_mul_hi_u32 v7, v4, v2
 ; GISEL-NEXT:    v_mul_lo_u32 v10, v5, v3
 ; GISEL-NEXT:    v_mul_hi_u32 v11, v5, v3
-; GISEL-NEXT:    v_sub_i32_e32 v12, vcc, 0, v8
+; GISEL-NEXT:    v_sub_i32_e32 v12, vcc, 0, v6
 ; GISEL-NEXT:    v_sub_i32_e32 v13, vcc, 0, v10
-; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
-; GISEL-NEXT:    v_cndmask_b32_e32 v8, v8, v12, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v6, v6, v12, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
-; GISEL-NEXT:    v_cndmask_b32_e64 v9, v10, v13, s[4:5]
-; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v4
-; GISEL-NEXT:    v_mul_hi_u32 v9, v9, v5
-; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v4, v8
-; GISEL-NEXT:    v_sub_i32_e64 v4, s[6:7], v4, v8
-; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v5, v9
-; GISEL-NEXT:    v_sub_i32_e64 v5, s[6:7], v5, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, v10, v13, s[4:5]
+; GISEL-NEXT:    v_mul_hi_u32 v6, v6, v4
+; GISEL-NEXT:    v_mul_hi_u32 v7, v7, v5
+; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v4, v6
+; GISEL-NEXT:    v_sub_i32_e64 v4, s[6:7], v4, v6
+; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v5, v7
+; GISEL-NEXT:    v_sub_i32_e64 v5, s[6:7], v5, v7
 ; GISEL-NEXT:    v_cndmask_b32_e32 v4, v4, v10, vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v8, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v5, v5, v6, s[4:5]
 ; GISEL-NEXT:    v_mul_hi_u32 v4, v4, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v5, v5, v1
-; GISEL-NEXT:    v_mul_lo_u32 v8, v4, v2
-; GISEL-NEXT:    v_add_i32_e32 v9, vcc, 1, v4
+; GISEL-NEXT:    v_mul_lo_u32 v6, v4, v2
+; GISEL-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
 ; GISEL-NEXT:    v_subrev_i32_e32 v10, vcc, 1, v4
 ; GISEL-NEXT:    v_mul_lo_u32 v11, v5, v3
 ; GISEL-NEXT:    v_add_i32_e32 v12, vcc, 1, v5
 ; GISEL-NEXT:    v_subrev_i32_e32 v13, vcc, 1, v5
-; GISEL-NEXT:    v_sub_i32_e32 v14, vcc, v0, v8
-; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v8
+; GISEL-NEXT:    v_sub_i32_e32 v14, vcc, v0, v6
+; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v6
 ; GISEL-NEXT:    v_sub_i32_e64 v0, s[4:5], v1, v11
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v1, v11
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v14, v2
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[8:9], v0, v3
 ; GISEL-NEXT:    s_and_b64 s[6:7], s[6:7], vcc
-; GISEL-NEXT:    v_cndmask_b32_e64 v0, v4, v9, s[6:7]
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v4, v7, s[6:7]
 ; GISEL-NEXT:    s_and_b64 s[6:7], s[8:9], s[4:5]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, v5, v12, s[6:7]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v10, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, v13, v1, s[4:5]
-; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v6
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v7
-; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v7
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v8
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v9
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v8
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v9
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_sdiv_v2i32_24bit:

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
index 62939b5e59e7..c53d5627d9fe 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
@@ -9,10 +9,11 @@ define i32 @v_srem_i32(i32 %num, i32 %den) {
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
 ; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v2
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v3, v1
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v3, v3
 ; GISEL-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
@@ -97,16 +98,17 @@ define amdgpu_ps i32 @s_srem_i32(i32 inreg %num, i32 inreg %den) {
 ; GISEL-LABEL: s_srem_i32:
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    s_ashr_i32 s4, s0, 31
+; GISEL-NEXT:    s_ashr_i32 s2, s1, 31
 ; GISEL-NEXT:    s_add_i32 s0, s0, s4
-; GISEL-NEXT:    s_add_i32 s1, s1, s4
-; GISEL-NEXT:    s_xor_b32 s2, s0, s4
-; GISEL-NEXT:    s_xor_b32 s3, s1, s4
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v0, s3
+; GISEL-NEXT:    s_add_i32 s1, s1, s2
+; GISEL-NEXT:    s_xor_b32 s3, s0, s4
+; GISEL-NEXT:    s_xor_b32 s2, s1, s2
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v0, s2
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v0, v0
 ; GISEL-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GISEL-NEXT:    v_mul_lo_u32 v1, v0, s3
-; GISEL-NEXT:    v_mul_hi_u32 v2, v0, s3
+; GISEL-NEXT:    v_mul_lo_u32 v1, v0, s2
+; GISEL-NEXT:    v_mul_hi_u32 v2, v0, s2
 ; GISEL-NEXT:    v_sub_i32_e32 v3, vcc, 0, v1
 ; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
 ; GISEL-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
@@ -114,13 +116,13 @@ define amdgpu_ps i32 @s_srem_i32(i32 inreg %num, i32 inreg %den) {
 ; GISEL-NEXT:    v_add_i32_e64 v2, s[0:1], v0, v1
 ; GISEL-NEXT:    v_sub_i32_e64 v0, s[0:1], v0, v1
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; GISEL-NEXT:    v_mul_hi_u32 v0, v0, s2
-; GISEL-NEXT:    v_mul_lo_u32 v0, v0, s3
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, s2, v0
-; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s3, v1
-; GISEL-NEXT:    v_add_i32_e64 v2, s[0:1], s3, v1
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[0:1], s2, v0
-; GISEL-NEXT:    v_subrev_i32_e64 v0, s[2:3], s3, v1
+; GISEL-NEXT:    v_mul_hi_u32 v0, v0, s3
+; GISEL-NEXT:    v_mul_lo_u32 v0, v0, s2
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, s3, v0
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s2, v1
+; GISEL-NEXT:    v_add_i32_e64 v2, s[0:1], s2, v1
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[0:1], s3, v0
+; GISEL-NEXT:    v_subrev_i32_e64 v0, s[2:3], s2, v1
 ; GISEL-NEXT:    s_and_b64 vcc, vcc, s[0:1]
 ; GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, v2, v0, s[0:1]
@@ -188,25 +190,27 @@ define <2 x i32> @v_srem_v2i32(<2 x i32> %num, <2 x i32> %den) {
 ; GISEL:       ; %bb.0:
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
-; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
+; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v2
+; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
 ; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
-; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v4
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
-; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v5
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v2
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v5
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v2
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v7, v3
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v5
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v7, v7
-; GISEL-NEXT:    v_mul_f32_e32 v6, 0x4f800000, v6
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v5
 ; GISEL-NEXT:    v_mul_f32_e32 v7, 0x4f800000, v7
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
-; GISEL-NEXT:    v_mul_lo_u32 v8, v6, v2
-; GISEL-NEXT:    v_mul_hi_u32 v9, v6, v2
+; GISEL-NEXT:    v_mul_lo_u32 v8, v5, v2
+; GISEL-NEXT:    v_mul_hi_u32 v9, v5, v2
 ; GISEL-NEXT:    v_mul_lo_u32 v10, v7, v3
 ; GISEL-NEXT:    v_mul_hi_u32 v11, v7, v3
 ; GISEL-NEXT:    v_sub_i32_e32 v12, vcc, 0, v8
@@ -215,23 +219,23 @@ define <2 x i32> @v_srem_v2i32(<2 x i32> %num, <2 x i32> %den) {
 ; GISEL-NEXT:    v_cndmask_b32_e32 v8, v8, v12, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
 ; GISEL-NEXT:    v_cndmask_b32_e64 v9, v10, v13, s[4:5]
-; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v6
+; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v5
 ; GISEL-NEXT:    v_mul_hi_u32 v9, v9, v7
-; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v6, v8
-; GISEL-NEXT:    v_sub_i32_e64 v6, s[6:7], v6, v8
+; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v5, v8
+; GISEL-NEXT:    v_sub_i32_e64 v5, s[6:7], v5, v8
 ; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v7, v9
 ; GISEL-NEXT:    v_sub_i32_e64 v7, s[6:7], v7, v9
-; GISEL-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v10, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, v7, v8, s[4:5]
-; GISEL-NEXT:    v_mul_hi_u32 v6, v6, v0
+; GISEL-NEXT:    v_mul_hi_u32 v5, v5, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v7, v7, v1
-; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v2
+; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v2
 ; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v3
-; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, v0, v6
+; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, v0, v5
 ; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, v1, v7
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v2
 ; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v8, v2
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v6
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v5
 ; GISEL-NEXT:    v_sub_i32_e64 v0, s[6:7], v8, v2
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v9, v3
 ; GISEL-NEXT:    v_add_i32_e64 v2, s[8:9], v9, v3
@@ -244,9 +248,9 @@ define <2 x i32> @v_srem_v2i32(<2 x i32> %num, <2 x i32> %den) {
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, v10, v0, s[4:5]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, v2, v1, s[8:9]
 ; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
 ; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_srem_v2i32:
@@ -345,17 +349,17 @@ define i32 @v_srem_i32_pow2k_denom(i32 %num) {
 ; CHECK-LABEL: v_srem_i32_pow2k_denom:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_movk_i32 s6, 0x1000
 ; CHECK-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; CHECK-NEXT:    v_mov_b32_e32 v2, 0x1000
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 0x1000, v1
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, s6
 ; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
-; CHECK-NEXT:    v_xor_b32_e32 v2, v2, v1
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, v2
 ; CHECK-NEXT:    v_rcp_iflag_f32_e32 v3, v3
 ; CHECK-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; CHECK-NEXT:    v_mul_lo_u32 v4, v3, v2
-; CHECK-NEXT:    v_mul_hi_u32 v5, v3, v2
+; CHECK-NEXT:    v_lshlrev_b32_e32 v4, 12, v3
+; CHECK-NEXT:    v_mul_hi_u32 v5, v3, s6
 ; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, 0, v4
 ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
 ; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
@@ -364,9 +368,9 @@ define i32 @v_srem_i32_pow2k_denom(i32 %num) {
 ; CHECK-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
 ; CHECK-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
 ; CHECK-NEXT:    v_mul_hi_u32 v3, v3, v0
-; CHECK-NEXT:    v_mul_lo_u32 v3, v3, v2
+; CHECK-NEXT:    v_lshlrev_b32_e32 v3, 12, v3
 ; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, v0, v3
-; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v2
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v4
 ; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v4, v2
 ; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v3
 ; CHECK-NEXT:    v_sub_i32_e64 v0, s[6:7], v4, v2
@@ -381,72 +385,128 @@ define i32 @v_srem_i32_pow2k_denom(i32 %num) {
 }
 
 define <2 x i32> @v_srem_v2i32_pow2k_denom(<2 x i32> %num) {
-; CHECK-LABEL: v_srem_v2i32_pow2k_denom:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_movk_i32 s4, 0x1000
-; CHECK-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
-; CHECK-NEXT:    v_mov_b32_e32 v3, 0x1000
-; CHECK-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, s4, v2
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
-; CHECK-NEXT:    v_xor_b32_e32 v5, v5, v2
-; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v4
-; CHECK-NEXT:    v_xor_b32_e32 v3, v3, v4
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v6, v5
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v7, v3
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v6, v6
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v7, v7
-; CHECK-NEXT:    v_mul_f32_e32 v6, 0x4f800000, v6
-; CHECK-NEXT:    v_mul_f32_e32 v7, 0x4f800000, v7
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v6, v6
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v7, v7
-; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v5
-; CHECK-NEXT:    v_mul_hi_u32 v9, v6, v5
-; CHECK-NEXT:    v_mul_lo_u32 v10, v7, v3
-; CHECK-NEXT:    v_mul_hi_u32 v11, v7, v3
-; CHECK-NEXT:    v_sub_i32_e32 v12, vcc, 0, v8
-; CHECK-NEXT:    v_sub_i32_e32 v13, vcc, 0, v10
-; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
-; CHECK-NEXT:    v_cndmask_b32_e32 v8, v8, v12, vcc
-; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, v10, v13, s[4:5]
-; CHECK-NEXT:    v_mul_hi_u32 v8, v8, v6
-; CHECK-NEXT:    v_mul_hi_u32 v9, v9, v7
-; CHECK-NEXT:    v_add_i32_e64 v10, s[6:7], v6, v8
-; CHECK-NEXT:    v_sub_i32_e64 v6, s[6:7], v6, v8
-; CHECK-NEXT:    v_add_i32_e64 v8, s[6:7], v7, v9
-; CHECK-NEXT:    v_sub_i32_e64 v7, s[6:7], v7, v9
-; CHECK-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, v7, v8, s[4:5]
-; CHECK-NEXT:    v_mul_hi_u32 v6, v6, v0
-; CHECK-NEXT:    v_mul_hi_u32 v7, v7, v1
-; CHECK-NEXT:    v_mul_lo_u32 v6, v6, v5
-; CHECK-NEXT:    v_mul_lo_u32 v7, v7, v3
-; CHECK-NEXT:    v_sub_i32_e32 v8, vcc, v0, v6
-; CHECK-NEXT:    v_sub_i32_e32 v9, vcc, v1, v7
-; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v5
-; CHECK-NEXT:    v_add_i32_e64 v10, s[4:5], v8, v5
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v6
-; CHECK-NEXT:    v_sub_i32_e64 v0, s[6:7], v8, v5
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[6:7], v9, v3
-; CHECK-NEXT:    v_add_i32_e64 v5, s[8:9], v9, v3
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[8:9], v1, v7
-; CHECK-NEXT:    v_sub_i32_e64 v1, s[10:11], v9, v3
-; CHECK-NEXT:    s_and_b64 vcc, vcc, s[4:5]
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
-; CHECK-NEXT:    s_and_b64 vcc, s[6:7], s[8:9]
-; CHECK-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
-; CHECK-NEXT:    v_cndmask_b32_e64 v0, v10, v0, s[4:5]
-; CHECK-NEXT:    v_cndmask_b32_e64 v1, v5, v1, s[8:9]
-; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
-; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v4
-; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
-; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-LABEL: v_srem_v2i32_pow2k_denom:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; GISEL-NEXT:    s_add_i32 s10, 0x1000, 0
+; GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s10
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v4
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v4
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    v_mul_lo_u32 v6, v5, s10
+; GISEL-NEXT:    v_mul_hi_u32 v7, v5, s10
+; GISEL-NEXT:    v_mul_lo_u32 v8, v4, s10
+; GISEL-NEXT:    v_mul_hi_u32 v9, v4, s10
+; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, 0, v6
+; GISEL-NEXT:    v_sub_i32_e32 v11, vcc, 0, v8
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, v8, v11, s[4:5]
+; GISEL-NEXT:    v_mul_hi_u32 v6, v6, v5
+; GISEL-NEXT:    v_mul_hi_u32 v7, v7, v4
+; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v5, v6
+; GISEL-NEXT:    v_sub_i32_e64 v5, s[6:7], v5, v6
+; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v4, v7
+; GISEL-NEXT:    v_sub_i32_e64 v4, s[6:7], v4, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v8, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, v4, v6, s[4:5]
+; GISEL-NEXT:    v_mul_hi_u32 v5, v5, v0
+; GISEL-NEXT:    v_mul_hi_u32 v4, v4, v1
+; GISEL-NEXT:    v_mul_lo_u32 v5, v5, s10
+; GISEL-NEXT:    v_mul_lo_u32 v4, v4, s10
+; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v0, v5
+; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v1, v4
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s10, v6
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], s10, v6
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v5
+; GISEL-NEXT:    v_subrev_i32_e64 v0, s[6:7], s10, v6
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], s10, v7
+; GISEL-NEXT:    v_add_i32_e64 v5, s[8:9], s10, v7
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[8:9], v1, v4
+; GISEL-NEXT:    v_subrev_i32_e64 v1, s[10:11], s10, v7
+; GISEL-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
+; GISEL-NEXT:    s_and_b64 vcc, s[6:7], s[8:9]
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v8, v0, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v5, v1, s[8:9]
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_srem_v2i32_pow2k_denom:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_movk_i32 s4, 0x1000
+; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; CGP-NEXT:    v_mov_b32_e32 v3, 0x1000
+; CGP-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; CGP-NEXT:    v_cvt_f32_u32_e32 v5, s4
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
+; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v3
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, v5
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v4
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v6, v6
+; CGP-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v5
+; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f800000, v6
+; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; CGP-NEXT:    v_lshlrev_b32_e32 v7, 12, v5
+; CGP-NEXT:    v_mul_hi_u32 v8, v5, s4
+; CGP-NEXT:    v_lshlrev_b32_e32 v9, 12, v6
+; CGP-NEXT:    v_mul_hi_u32 v10, v6, v3
+; CGP-NEXT:    v_sub_i32_e32 v11, vcc, 0, v7
+; CGP-NEXT:    v_sub_i32_e32 v12, vcc, 0, v9
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v8
+; CGP-NEXT:    v_cndmask_b32_e32 v7, v7, v11, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v8, v9, v12, s[4:5]
+; CGP-NEXT:    v_mul_hi_u32 v7, v7, v5
+; CGP-NEXT:    v_mul_hi_u32 v8, v8, v6
+; CGP-NEXT:    v_add_i32_e64 v9, s[6:7], v5, v7
+; CGP-NEXT:    v_sub_i32_e64 v5, s[6:7], v5, v7
+; CGP-NEXT:    v_add_i32_e64 v7, s[6:7], v6, v8
+; CGP-NEXT:    v_sub_i32_e64 v6, s[6:7], v6, v8
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v5, v9, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[4:5]
+; CGP-NEXT:    v_mul_hi_u32 v5, v5, v0
+; CGP-NEXT:    v_mul_hi_u32 v6, v6, v1
+; CGP-NEXT:    v_lshlrev_b32_e32 v5, 12, v5
+; CGP-NEXT:    v_lshlrev_b32_e32 v6, 12, v6
+; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v0, v5
+; CGP-NEXT:    v_sub_i32_e32 v8, vcc, v1, v6
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v7, v3
+; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v7, v3
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v5
+; CGP-NEXT:    v_sub_i32_e64 v0, s[6:7], v7, v3
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[6:7], v8, v3
+; CGP-NEXT:    v_add_i32_e64 v5, s[8:9], v8, v3
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[8:9], v1, v6
+; CGP-NEXT:    v_sub_i32_e64 v1, s[10:11], v8, v3
+; CGP-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v7, v0, vcc
+; CGP-NEXT:    s_and_b64 vcc, s[6:7], s[8:9]
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v9, v0, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e64 v1, v5, v1, s[8:9]
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v4
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
+; CGP-NEXT:    s_setpc_b64 s[30:31]
   %result = srem <2 x i32> %num, <i32 4096, i32 4096>
   ret <2 x i32> %result
 }
@@ -455,17 +515,17 @@ define i32 @v_srem_i32_oddk_denom(i32 %num) {
 ; CHECK-LABEL: v_srem_i32_oddk_denom:
 ; CHECK:       ; %bb.0:
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_mov_b32 s6, 0x12d8fb
 ; CHECK-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; CHECK-NEXT:    v_mov_b32_e32 v2, 0x12d8fb
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 0x12d8fb, v1
+; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, s6
 ; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v1
-; CHECK-NEXT:    v_xor_b32_e32 v2, v2, v1
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, v2
 ; CHECK-NEXT:    v_rcp_iflag_f32_e32 v3, v3
 ; CHECK-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; CHECK-NEXT:    v_mul_lo_u32 v4, v3, v2
-; CHECK-NEXT:    v_mul_hi_u32 v5, v3, v2
+; CHECK-NEXT:    v_mul_lo_u32 v4, v3, s6
+; CHECK-NEXT:    v_mul_hi_u32 v5, v3, s6
 ; CHECK-NEXT:    v_sub_i32_e32 v6, vcc, 0, v4
 ; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
 ; CHECK-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
@@ -474,9 +534,9 @@ define i32 @v_srem_i32_oddk_denom(i32 %num) {
 ; CHECK-NEXT:    v_sub_i32_e64 v3, s[4:5], v3, v4
 ; CHECK-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
 ; CHECK-NEXT:    v_mul_hi_u32 v3, v3, v0
-; CHECK-NEXT:    v_mul_lo_u32 v3, v3, v2
+; CHECK-NEXT:    v_mul_lo_u32 v3, v3, s6
 ; CHECK-NEXT:    v_sub_i32_e32 v4, vcc, v0, v3
-; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v2
+; CHECK-NEXT:    v_cmp_le_u32_e32 vcc, s6, v4
 ; CHECK-NEXT:    v_add_i32_e64 v5, s[4:5], v4, v2
 ; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v3
 ; CHECK-NEXT:    v_sub_i32_e64 v0, s[6:7], v4, v2
@@ -491,72 +551,128 @@ define i32 @v_srem_i32_oddk_denom(i32 %num) {
 }
 
 define <2 x i32> @v_srem_v2i32_oddk_denom(<2 x i32> %num) {
-; CHECK-LABEL: v_srem_v2i32_oddk_denom:
-; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_mov_b32 s4, 0x12d8fb
-; CHECK-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
-; CHECK-NEXT:    v_mov_b32_e32 v3, 0x12d8fb
-; CHECK-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
-; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CHECK-NEXT:    v_add_i32_e32 v5, vcc, s4, v2
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
-; CHECK-NEXT:    v_xor_b32_e32 v5, v5, v2
-; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v4
-; CHECK-NEXT:    v_xor_b32_e32 v3, v3, v4
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v6, v5
-; CHECK-NEXT:    v_cvt_f32_u32_e32 v7, v3
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v6, v6
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v7, v7
-; CHECK-NEXT:    v_mul_f32_e32 v6, 0x4f800000, v6
-; CHECK-NEXT:    v_mul_f32_e32 v7, 0x4f800000, v7
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v6, v6
-; CHECK-NEXT:    v_cvt_u32_f32_e32 v7, v7
-; CHECK-NEXT:    v_mul_lo_u32 v8, v6, v5
-; CHECK-NEXT:    v_mul_hi_u32 v9, v6, v5
-; CHECK-NEXT:    v_mul_lo_u32 v10, v7, v3
-; CHECK-NEXT:    v_mul_hi_u32 v11, v7, v3
-; CHECK-NEXT:    v_sub_i32_e32 v12, vcc, 0, v8
-; CHECK-NEXT:    v_sub_i32_e32 v13, vcc, 0, v10
-; CHECK-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v9
-; CHECK-NEXT:    v_cndmask_b32_e32 v8, v8, v12, vcc
-; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
-; CHECK-NEXT:    v_cndmask_b32_e64 v9, v10, v13, s[4:5]
-; CHECK-NEXT:    v_mul_hi_u32 v8, v8, v6
-; CHECK-NEXT:    v_mul_hi_u32 v9, v9, v7
-; CHECK-NEXT:    v_add_i32_e64 v10, s[6:7], v6, v8
-; CHECK-NEXT:    v_sub_i32_e64 v6, s[6:7], v6, v8
-; CHECK-NEXT:    v_add_i32_e64 v8, s[6:7], v7, v9
-; CHECK-NEXT:    v_sub_i32_e64 v7, s[6:7], v7, v9
-; CHECK-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc
-; CHECK-NEXT:    v_cndmask_b32_e64 v7, v7, v8, s[4:5]
-; CHECK-NEXT:    v_mul_hi_u32 v6, v6, v0
-; CHECK-NEXT:    v_mul_hi_u32 v7, v7, v1
-; CHECK-NEXT:    v_mul_lo_u32 v6, v6, v5
-; CHECK-NEXT:    v_mul_lo_u32 v7, v7, v3
-; CHECK-NEXT:    v_sub_i32_e32 v8, vcc, v0, v6
-; CHECK-NEXT:    v_sub_i32_e32 v9, vcc, v1, v7
-; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v5
-; CHECK-NEXT:    v_add_i32_e64 v10, s[4:5], v8, v5
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v6
-; CHECK-NEXT:    v_sub_i32_e64 v0, s[6:7], v8, v5
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[6:7], v9, v3
-; CHECK-NEXT:    v_add_i32_e64 v5, s[8:9], v9, v3
-; CHECK-NEXT:    v_cmp_ge_u32_e64 s[8:9], v1, v7
-; CHECK-NEXT:    v_sub_i32_e64 v1, s[10:11], v9, v3
-; CHECK-NEXT:    s_and_b64 vcc, vcc, s[4:5]
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v8, v0, vcc
-; CHECK-NEXT:    s_and_b64 vcc, s[6:7], s[8:9]
-; CHECK-NEXT:    v_cndmask_b32_e32 v1, v9, v1, vcc
-; CHECK-NEXT:    v_cndmask_b32_e64 v0, v10, v0, s[4:5]
-; CHECK-NEXT:    v_cndmask_b32_e64 v1, v5, v1, s[8:9]
-; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
-; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v4
-; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
-; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-LABEL: v_srem_v2i32_oddk_denom:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; GISEL-NEXT:    s_add_i32 s10, 0x12d8fb, 0
+; GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
+; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v4, s10
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v4, v4
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v4
+; GISEL-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v4
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GISEL-NEXT:    v_mul_lo_u32 v6, v5, s10
+; GISEL-NEXT:    v_mul_hi_u32 v7, v5, s10
+; GISEL-NEXT:    v_mul_lo_u32 v8, v4, s10
+; GISEL-NEXT:    v_mul_hi_u32 v9, v4, s10
+; GISEL-NEXT:    v_sub_i32_e32 v10, vcc, 0, v6
+; GISEL-NEXT:    v_sub_i32_e32 v11, vcc, 0, v8
+; GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc
+; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v9
+; GISEL-NEXT:    v_cndmask_b32_e64 v7, v8, v11, s[4:5]
+; GISEL-NEXT:    v_mul_hi_u32 v6, v6, v5
+; GISEL-NEXT:    v_mul_hi_u32 v7, v7, v4
+; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v5, v6
+; GISEL-NEXT:    v_sub_i32_e64 v5, s[6:7], v5, v6
+; GISEL-NEXT:    v_add_i32_e64 v6, s[6:7], v4, v7
+; GISEL-NEXT:    v_sub_i32_e64 v4, s[6:7], v4, v7
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v8, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v4, v4, v6, s[4:5]
+; GISEL-NEXT:    v_mul_hi_u32 v5, v5, v0
+; GISEL-NEXT:    v_mul_hi_u32 v4, v4, v1
+; GISEL-NEXT:    v_mul_lo_u32 v5, v5, s10
+; GISEL-NEXT:    v_mul_lo_u32 v4, v4, s10
+; GISEL-NEXT:    v_sub_i32_e32 v6, vcc, v0, v5
+; GISEL-NEXT:    v_sub_i32_e32 v7, vcc, v1, v4
+; GISEL-NEXT:    v_cmp_le_u32_e32 vcc, s10, v6
+; GISEL-NEXT:    v_add_i32_e64 v8, s[4:5], s10, v6
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v5
+; GISEL-NEXT:    v_subrev_i32_e64 v0, s[6:7], s10, v6
+; GISEL-NEXT:    v_cmp_le_u32_e64 s[6:7], s10, v7
+; GISEL-NEXT:    v_add_i32_e64 v5, s[8:9], s10, v7
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[8:9], v1, v4
+; GISEL-NEXT:    v_subrev_i32_e64 v1, s[10:11], s10, v7
+; GISEL-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
+; GISEL-NEXT:    s_and_b64 vcc, s[6:7], s[8:9]
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v7, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, v8, v0, s[4:5]
+; GISEL-NEXT:    v_cndmask_b32_e64 v1, v5, v1, s[8:9]
+; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; CGP-LABEL: v_srem_v2i32_oddk_denom:
+; CGP:       ; %bb.0:
+; CGP-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CGP-NEXT:    s_mov_b32 s8, 0x12d8fb
+; CGP-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; CGP-NEXT:    v_mov_b32_e32 v3, 0x12d8fb
+; CGP-NEXT:    v_ashrrev_i32_e32 v4, 31, v1
+; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; CGP-NEXT:    v_cvt_f32_u32_e32 v5, s8
+; CGP-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
+; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v3
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, v5
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v4
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v6, v6
+; CGP-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v5
+; CGP-NEXT:    v_mul_f32_e32 v6, 0x4f800000, v6
+; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v5
+; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; CGP-NEXT:    v_mul_lo_u32 v7, v5, s8
+; CGP-NEXT:    v_mul_hi_u32 v8, v5, s8
+; CGP-NEXT:    v_mul_lo_u32 v9, v6, v3
+; CGP-NEXT:    v_mul_hi_u32 v10, v6, v3
+; CGP-NEXT:    v_sub_i32_e32 v11, vcc, 0, v7
+; CGP-NEXT:    v_sub_i32_e32 v12, vcc, 0, v9
+; CGP-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v8
+; CGP-NEXT:    v_cndmask_b32_e32 v7, v7, v11, vcc
+; CGP-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v10
+; CGP-NEXT:    v_cndmask_b32_e64 v8, v9, v12, s[4:5]
+; CGP-NEXT:    v_mul_hi_u32 v7, v7, v5
+; CGP-NEXT:    v_mul_hi_u32 v8, v8, v6
+; CGP-NEXT:    v_add_i32_e64 v9, s[6:7], v5, v7
+; CGP-NEXT:    v_sub_i32_e64 v5, s[6:7], v5, v7
+; CGP-NEXT:    v_add_i32_e64 v7, s[6:7], v6, v8
+; CGP-NEXT:    v_sub_i32_e64 v6, s[6:7], v6, v8
+; CGP-NEXT:    v_cndmask_b32_e32 v5, v5, v9, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v6, v6, v7, s[4:5]
+; CGP-NEXT:    v_mul_hi_u32 v5, v5, v0
+; CGP-NEXT:    v_mul_hi_u32 v6, v6, v1
+; CGP-NEXT:    v_mul_lo_u32 v5, v5, s8
+; CGP-NEXT:    v_mul_lo_u32 v6, v6, v3
+; CGP-NEXT:    v_sub_i32_e32 v7, vcc, v0, v5
+; CGP-NEXT:    v_sub_i32_e32 v8, vcc, v1, v6
+; CGP-NEXT:    v_cmp_le_u32_e32 vcc, s8, v7
+; CGP-NEXT:    v_add_i32_e64 v9, s[4:5], v7, v3
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v5
+; CGP-NEXT:    v_sub_i32_e64 v0, s[6:7], v7, v3
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[6:7], v8, v3
+; CGP-NEXT:    v_add_i32_e64 v5, s[8:9], v8, v3
+; CGP-NEXT:    v_cmp_ge_u32_e64 s[8:9], v1, v6
+; CGP-NEXT:    v_sub_i32_e64 v1, s[10:11], v8, v3
+; CGP-NEXT:    s_and_b64 vcc, vcc, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v7, v0, vcc
+; CGP-NEXT:    s_and_b64 vcc, s[6:7], s[8:9]
+; CGP-NEXT:    v_cndmask_b32_e32 v1, v8, v1, vcc
+; CGP-NEXT:    v_cndmask_b32_e64 v0, v9, v0, s[4:5]
+; CGP-NEXT:    v_cndmask_b32_e64 v1, v5, v1, s[8:9]
+; CGP-NEXT:    v_xor_b32_e32 v0, v0, v2
+; CGP-NEXT:    v_xor_b32_e32 v1, v1, v4
+; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2
+; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v1, v4
+; CGP-NEXT:    s_setpc_b64 s[30:31]
   %result = srem <2 x i32> %num, <i32 1235195, i32 1235195>
   ret <2 x i32> %result
 }
@@ -567,10 +683,11 @@ define i32 @v_srem_i32_pow2_shl_denom(i32 %x, i32 %y) {
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    v_lshl_b32_e32 v1, 0x1000, v1
 ; CHECK-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; CHECK-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
+; CHECK-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
 ; CHECK-NEXT:    v_xor_b32_e32 v0, v0, v2
-; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v2
+; CHECK-NEXT:    v_xor_b32_e32 v1, v1, v3
 ; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, v1
 ; CHECK-NEXT:    v_rcp_iflag_f32_e32 v3, v3
 ; CHECK-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
@@ -613,12 +730,14 @@ define <2 x i32> @v_srem_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) {
 ; GISEL-NEXT:    v_lshl_b32_e32 v3, s4, v3
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
 ; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
+; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v2
 ; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
 ; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
-; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v4
-; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v5
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v6
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v2
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v7, v3
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
@@ -775,10 +894,11 @@ define i32 @v_srem_i32_24bit(i32 %num, i32 %den) {
 ; GISEL-NEXT:    v_and_b32_e32 v0, s4, v0
 ; GISEL-NEXT:    v_and_b32_e32 v1, s4, v1
 ; GISEL-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
+; GISEL-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
 ; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v2
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v2
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v3
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v3, v1
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v3, v3
 ; GISEL-NEXT:    v_mul_f32_e32 v3, 0x4f800000, v3
@@ -863,25 +983,27 @@ define <2 x i32> @v_srem_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
 ; GISEL-NEXT:    v_and_b32_e32 v2, s4, v2
 ; GISEL-NEXT:    v_and_b32_e32 v3, s4, v3
 ; GISEL-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
-; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
+; GISEL-NEXT:    v_ashrrev_i32_e32 v5, 31, v2
+; GISEL-NEXT:    v_ashrrev_i32_e32 v6, 31, v1
+; GISEL-NEXT:    v_ashrrev_i32_e32 v7, 31, v3
 ; GISEL-NEXT:    v_add_i32_e32 v0, vcc, v0, v4
-; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
-; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; GISEL-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
+; GISEL-NEXT:    v_add_i32_e32 v1, vcc, v1, v6
+; GISEL-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
 ; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
-; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v4
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
-; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v5
-; GISEL-NEXT:    v_cvt_f32_u32_e32 v6, v2
+; GISEL-NEXT:    v_xor_b32_e32 v2, v2, v5
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
+; GISEL-NEXT:    v_xor_b32_e32 v3, v3, v7
+; GISEL-NEXT:    v_cvt_f32_u32_e32 v5, v2
 ; GISEL-NEXT:    v_cvt_f32_u32_e32 v7, v3
-; GISEL-NEXT:    v_rcp_iflag_f32_e32 v6, v6
+; GISEL-NEXT:    v_rcp_iflag_f32_e32 v5, v5
 ; GISEL-NEXT:    v_rcp_iflag_f32_e32 v7, v7
-; GISEL-NEXT:    v_mul_f32_e32 v6, 0x4f800000, v6
+; GISEL-NEXT:    v_mul_f32_e32 v5, 0x4f800000, v5
 ; GISEL-NEXT:    v_mul_f32_e32 v7, 0x4f800000, v7
-; GISEL-NEXT:    v_cvt_u32_f32_e32 v6, v6
+; GISEL-NEXT:    v_cvt_u32_f32_e32 v5, v5
 ; GISEL-NEXT:    v_cvt_u32_f32_e32 v7, v7
-; GISEL-NEXT:    v_mul_lo_u32 v8, v6, v2
-; GISEL-NEXT:    v_mul_hi_u32 v9, v6, v2
+; GISEL-NEXT:    v_mul_lo_u32 v8, v5, v2
+; GISEL-NEXT:    v_mul_hi_u32 v9, v5, v2
 ; GISEL-NEXT:    v_mul_lo_u32 v10, v7, v3
 ; GISEL-NEXT:    v_mul_hi_u32 v11, v7, v3
 ; GISEL-NEXT:    v_sub_i32_e32 v12, vcc, 0, v8
@@ -890,23 +1012,23 @@ define <2 x i32> @v_srem_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
 ; GISEL-NEXT:    v_cndmask_b32_e32 v8, v8, v12, vcc
 ; GISEL-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v11
 ; GISEL-NEXT:    v_cndmask_b32_e64 v9, v10, v13, s[4:5]
-; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v6
+; GISEL-NEXT:    v_mul_hi_u32 v8, v8, v5
 ; GISEL-NEXT:    v_mul_hi_u32 v9, v9, v7
-; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v6, v8
-; GISEL-NEXT:    v_sub_i32_e64 v6, s[6:7], v6, v8
+; GISEL-NEXT:    v_add_i32_e64 v10, s[6:7], v5, v8
+; GISEL-NEXT:    v_sub_i32_e64 v5, s[6:7], v5, v8
 ; GISEL-NEXT:    v_add_i32_e64 v8, s[6:7], v7, v9
 ; GISEL-NEXT:    v_sub_i32_e64 v7, s[6:7], v7, v9
-; GISEL-NEXT:    v_cndmask_b32_e32 v6, v6, v10, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v5, v5, v10, vcc
 ; GISEL-NEXT:    v_cndmask_b32_e64 v7, v7, v8, s[4:5]
-; GISEL-NEXT:    v_mul_hi_u32 v6, v6, v0
+; GISEL-NEXT:    v_mul_hi_u32 v5, v5, v0
 ; GISEL-NEXT:    v_mul_hi_u32 v7, v7, v1
-; GISEL-NEXT:    v_mul_lo_u32 v6, v6, v2
+; GISEL-NEXT:    v_mul_lo_u32 v5, v5, v2
 ; GISEL-NEXT:    v_mul_lo_u32 v7, v7, v3
-; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, v0, v6
+; GISEL-NEXT:    v_sub_i32_e32 v8, vcc, v0, v5
 ; GISEL-NEXT:    v_sub_i32_e32 v9, vcc, v1, v7
 ; GISEL-NEXT:    v_cmp_ge_u32_e32 vcc, v8, v2
 ; GISEL-NEXT:    v_add_i32_e64 v10, s[4:5], v8, v2
-; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v6
+; GISEL-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v5
 ; GISEL-NEXT:    v_sub_i32_e64 v0, s[6:7], v8, v2
 ; GISEL-NEXT:    v_cmp_ge_u32_e64 s[6:7], v9, v3
 ; GISEL-NEXT:    v_add_i32_e64 v2, s[8:9], v9, v3
@@ -919,9 +1041,9 @@ define <2 x i32> @v_srem_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
 ; GISEL-NEXT:    v_cndmask_b32_e64 v0, v10, v0, s[4:5]
 ; GISEL-NEXT:    v_cndmask_b32_e64 v1, v2, v1, s[8:9]
 ; GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
-; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
+; GISEL-NEXT:    v_xor_b32_e32 v1, v1, v6
 ; GISEL-NEXT:    v_sub_i32_e32 v0, vcc, v0, v4
-; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v5
+; GISEL-NEXT:    v_sub_i32_e32 v1, vcc, v1, v6
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; CGP-LABEL: v_srem_v2i32_24bit:


        


More information about the llvm-commits mailing list