[llvm] c97436f - [AMDGPU] Use null for dead sdst operand

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 13 14:41:54 PDT 2022


Author: Stanislav Mekhanoshin
Date: 2022-06-13T14:41:40-07:00
New Revision: c97436f8b6e2718286e8496faf53a2c800e281cf

URL: https://github.com/llvm/llvm-project/commit/c97436f8b6e2718286e8496faf53a2c800e281cf
DIFF: https://github.com/llvm/llvm-project/commit/c97436f8b6e2718286e8496faf53a2c800e281cf.diff

LOG: [AMDGPU] Use null for dead sdst operand

Differential Revision: https://reviews.llvm.org/D127542

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll
    llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll
    llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
    llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
    llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll
    llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll
    llvm/test/CodeGen/AMDGPU/carryout-selection.ll
    llvm/test/CodeGen/AMDGPU/frem.ll
    llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll
    llvm/test/CodeGen/AMDGPU/global-saddr-load.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll
    llvm/test/CodeGen/AMDGPU/llvm.mulo.ll
    llvm/test/CodeGen/AMDGPU/mad_64_32.ll
    llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll
    llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll
    llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll
    llvm/test/CodeGen/AMDGPU/offset-split-global.ll
    llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
    llvm/test/CodeGen/AMDGPU/saddo.ll
    llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
    llvm/test/CodeGen/AMDGPU/udiv.ll
    llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
    llvm/test/CodeGen/AMDGPU/wave32.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index fd1e57b1e951..57ed89278b2d 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -48,6 +48,7 @@ class SIShrinkInstructions : public MachineFunctionPass {
   void shrinkMIMG(MachineInstr &MI) const;
   void shrinkMadFma(MachineInstr &MI) const;
   bool shrinkScalarLogicOp(MachineInstr &MI) const;
+  bool tryReplaceDeadSDST(MachineInstr &MI) const;
   bool instAccessReg(iterator_range<MachineInstr::const_mop_iterator> &&R,
                      Register Reg, unsigned SubReg) const;
   bool instReadsReg(const MachineInstr *MI, unsigned Reg,
@@ -689,6 +690,22 @@ MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const {
   return nullptr;
 }
 
+// If an instruction has dead sdst replace it with NULL register on gfx10+
+bool SIShrinkInstructions::tryReplaceDeadSDST(MachineInstr &MI) const {
+  if (ST->getGeneration() < AMDGPUSubtarget::GFX10)
+    return false;
+
+  MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
+  if (!Op)
+    return false;
+  Register SDstReg = Op->getReg();
+  if (SDstReg.isPhysical() || !MRI->use_nodbg_empty(SDstReg))
+    return false;
+
+  Op->setReg(ST->isWave32() ? AMDGPU::SGPR_NULL : AMDGPU::SGPR_NULL64);
+  return true;
+}
+
 bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(MF.getFunction()))
     return false;
@@ -822,15 +839,21 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
         continue;
       }
 
-      if (!TII->hasVALU32BitEncoding(MI.getOpcode()))
+      if (!TII->hasVALU32BitEncoding(MI.getOpcode())) {
+        // If there is no chance we will shrink it and use VCC as sdst to get
+        // a 32 bit form try to replace dead sdst with NULL.
+        tryReplaceDeadSDST(MI);
         continue;
+      }
 
       if (!TII->canShrink(MI, *MRI)) {
         // Try commuting the instruction and see if that enables us to shrink
         // it.
         if (!MI.isCommutable() || !TII->commuteInstruction(MI) ||
-            !TII->canShrink(MI, *MRI))
+            !TII->canShrink(MI, *MRI)) {
+          tryReplaceDeadSDST(MI);
           continue;
+        }
       }
 
       int Op32 = AMDGPU::getVOPe32(MI.getOpcode());

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll
index 5f83beeceaad..af5a1a21f6ae 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll
@@ -220,7 +220,7 @@ define amdgpu_ps float @div_scale_s_s_true(float inreg %src0, float inreg %src1)
 ;
 ; GFX10-LABEL: div_scale_s_s_true:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_div_scale_f32 v0, s0, s2, s3, s2
+; GFX10-NEXT:    v_div_scale_f32 v0, null, s2, s3, s2
 ; GFX10-NEXT:    ; return to shader part epilog
   %div.scale = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %src0, float %src1, i1 true)
   %result = extractvalue { float, i1 } %div.scale, 0
@@ -236,7 +236,7 @@ define amdgpu_ps float @div_scale_s_s_false(float inreg %src0, float inreg %src1
 ;
 ; GFX10-LABEL: div_scale_s_s_false:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_div_scale_f32 v0, s0, s3, s3, s2
+; GFX10-NEXT:    v_div_scale_f32 v0, null, s3, s3, s2
 ; GFX10-NEXT:    ; return to shader part epilog
   %div.scale = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %src0, float %src1, i1 false)
   %result = extractvalue { float, i1 } %div.scale, 0

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
index 9e1dc06de933..a7ec5a1279b4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
@@ -84,7 +84,7 @@ define float @v_fdiv_f32(float %a, float %b) {
 ; GFX10-IEEE:       ; %bb.0:
 ; GFX10-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-IEEE-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-IEEE-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
 ; GFX10-IEEE-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v1, v0
 ; GFX10-IEEE-NEXT:    v_rcp_f32_e32 v3, v2
 ; GFX10-IEEE-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
@@ -101,7 +101,7 @@ define float @v_fdiv_f32(float %a, float %b) {
 ; GFX10-FLUSH:       ; %bb.0:
 ; GFX10-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-FLUSH-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-FLUSH-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
+; GFX10-FLUSH-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
 ; GFX10-FLUSH-NEXT:    v_div_scale_f32 v4, vcc_lo, v0, v1, v0
 ; GFX10-FLUSH-NEXT:    v_rcp_f32_e32 v3, v2
 ; GFX10-FLUSH-NEXT:    s_denorm_mode 3
@@ -188,7 +188,7 @@ define float @v_fdiv_f32_ulp25(float %a, float %b) {
 ; GFX10-IEEE:       ; %bb.0:
 ; GFX10-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-IEEE-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-IEEE-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
 ; GFX10-IEEE-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v1, v0
 ; GFX10-IEEE-NEXT:    v_rcp_f32_e32 v3, v2
 ; GFX10-IEEE-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
@@ -289,7 +289,7 @@ define float @v_rcp_f32(float %x) {
 ; GFX10-IEEE:       ; %bb.0:
 ; GFX10-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-IEEE-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-IEEE-NEXT:    v_div_scale_f32 v1, s4, v0, v0, 1.0
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v1, null, v0, v0, 1.0
 ; GFX10-IEEE-NEXT:    v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0
 ; GFX10-IEEE-NEXT:    v_rcp_f32_e32 v2, v1
 ; GFX10-IEEE-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
@@ -306,7 +306,7 @@ define float @v_rcp_f32(float %x) {
 ; GFX10-FLUSH:       ; %bb.0:
 ; GFX10-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-FLUSH-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-FLUSH-NEXT:    v_div_scale_f32 v1, s4, v0, v0, 1.0
+; GFX10-FLUSH-NEXT:    v_div_scale_f32 v1, null, v0, v0, 1.0
 ; GFX10-FLUSH-NEXT:    v_div_scale_f32 v3, vcc_lo, 1.0, v0, 1.0
 ; GFX10-FLUSH-NEXT:    v_rcp_f32_e32 v2, v1
 ; GFX10-FLUSH-NEXT:    s_denorm_mode 3
@@ -397,7 +397,7 @@ define float @v_rcp_f32_arcp(float %x) {
 ; GFX10-IEEE:       ; %bb.0:
 ; GFX10-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-IEEE-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-IEEE-NEXT:    v_div_scale_f32 v1, s4, v0, v0, 1.0
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v1, null, v0, v0, 1.0
 ; GFX10-IEEE-NEXT:    v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0
 ; GFX10-IEEE-NEXT:    v_rcp_f32_e32 v2, v1
 ; GFX10-IEEE-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
@@ -414,7 +414,7 @@ define float @v_rcp_f32_arcp(float %x) {
 ; GFX10-FLUSH:       ; %bb.0:
 ; GFX10-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-FLUSH-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-FLUSH-NEXT:    v_div_scale_f32 v1, s4, v0, v0, 1.0
+; GFX10-FLUSH-NEXT:    v_div_scale_f32 v1, null, v0, v0, 1.0
 ; GFX10-FLUSH-NEXT:    v_div_scale_f32 v3, vcc_lo, 1.0, v0, 1.0
 ; GFX10-FLUSH-NEXT:    v_rcp_f32_e32 v2, v1
 ; GFX10-FLUSH-NEXT:    s_denorm_mode 3
@@ -560,7 +560,7 @@ define float @v_fdiv_f32_arcp_ulp25(float %a, float %b) {
 ; GFX10-IEEE:       ; %bb.0:
 ; GFX10-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-IEEE-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-IEEE-NEXT:    v_div_scale_f32 v2, s4, v1, v1, v0
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v2, null, v1, v1, v0
 ; GFX10-IEEE-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v1, v0
 ; GFX10-IEEE-NEXT:    v_rcp_f32_e32 v3, v2
 ; GFX10-IEEE-NEXT:    v_fma_f32 v4, -v2, v3, 1.0
@@ -710,8 +710,8 @@ define <2 x float> @v_fdiv_v2f32(<2 x float> %a, <2 x float> %b) {
 ; GFX10-IEEE:       ; %bb.0:
 ; GFX10-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-IEEE-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-IEEE-NEXT:    v_div_scale_f32 v4, s4, v2, v2, v0
-; GFX10-IEEE-NEXT:    v_div_scale_f32 v5, s4, v3, v3, v1
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v4, null, v2, v2, v0
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v5, null, v3, v3, v1
 ; GFX10-IEEE-NEXT:    v_div_scale_f32 v10, vcc_lo, v0, v2, v0
 ; GFX10-IEEE-NEXT:    v_rcp_f32_e32 v6, v4
 ; GFX10-IEEE-NEXT:    v_rcp_f32_e32 v7, v5
@@ -739,7 +739,7 @@ define <2 x float> @v_fdiv_v2f32(<2 x float> %a, <2 x float> %b) {
 ; GFX10-FLUSH:       ; %bb.0:
 ; GFX10-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-FLUSH-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-FLUSH-NEXT:    v_div_scale_f32 v4, s4, v2, v2, v0
+; GFX10-FLUSH-NEXT:    v_div_scale_f32 v4, null, v2, v2, v0
 ; GFX10-FLUSH-NEXT:    v_div_scale_f32 v6, vcc_lo, v0, v2, v0
 ; GFX10-FLUSH-NEXT:    v_rcp_f32_e32 v5, v4
 ; GFX10-FLUSH-NEXT:    s_denorm_mode 3
@@ -750,7 +750,7 @@ define <2 x float> @v_fdiv_v2f32(<2 x float> %a, <2 x float> %b) {
 ; GFX10-FLUSH-NEXT:    v_fmac_f32_e32 v7, v8, v5
 ; GFX10-FLUSH-NEXT:    v_fma_f32 v4, -v4, v7, v6
 ; GFX10-FLUSH-NEXT:    s_denorm_mode 0
-; GFX10-FLUSH-NEXT:    v_div_scale_f32 v6, s4, v3, v3, v1
+; GFX10-FLUSH-NEXT:    v_div_scale_f32 v6, null, v3, v3, v1
 ; GFX10-FLUSH-NEXT:    v_div_fmas_f32 v4, v4, v5, v7
 ; GFX10-FLUSH-NEXT:    v_rcp_f32_e32 v5, v6
 ; GFX10-FLUSH-NEXT:    v_div_fixup_f32 v0, v4, v2, v0
@@ -872,8 +872,8 @@ define <2 x float> @v_fdiv_v2f32_ulp25(<2 x float> %a, <2 x float> %b) {
 ; GFX10-IEEE:       ; %bb.0:
 ; GFX10-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-IEEE-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-IEEE-NEXT:    v_div_scale_f32 v4, s4, v2, v2, v0
-; GFX10-IEEE-NEXT:    v_div_scale_f32 v5, s4, v3, v3, v1
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v4, null, v2, v2, v0
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v5, null, v3, v3, v1
 ; GFX10-IEEE-NEXT:    v_div_scale_f32 v10, vcc_lo, v0, v2, v0
 ; GFX10-IEEE-NEXT:    v_rcp_f32_e32 v6, v4
 ; GFX10-IEEE-NEXT:    v_rcp_f32_e32 v7, v5
@@ -1040,8 +1040,8 @@ define <2 x float> @v_rcp_v2f32(<2 x float> %x) {
 ; GFX10-IEEE:       ; %bb.0:
 ; GFX10-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-IEEE-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-IEEE-NEXT:    v_div_scale_f32 v2, s4, v0, v0, 1.0
-; GFX10-IEEE-NEXT:    v_div_scale_f32 v3, s4, v1, v1, 1.0
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v2, null, v0, v0, 1.0
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v3, null, v1, v1, 1.0
 ; GFX10-IEEE-NEXT:    v_div_scale_f32 v8, vcc_lo, 1.0, v0, 1.0
 ; GFX10-IEEE-NEXT:    v_rcp_f32_e32 v4, v2
 ; GFX10-IEEE-NEXT:    v_rcp_f32_e32 v5, v3
@@ -1069,7 +1069,7 @@ define <2 x float> @v_rcp_v2f32(<2 x float> %x) {
 ; GFX10-FLUSH:       ; %bb.0:
 ; GFX10-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-FLUSH-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-FLUSH-NEXT:    v_div_scale_f32 v2, s4, v0, v0, 1.0
+; GFX10-FLUSH-NEXT:    v_div_scale_f32 v2, null, v0, v0, 1.0
 ; GFX10-FLUSH-NEXT:    v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0
 ; GFX10-FLUSH-NEXT:    v_rcp_f32_e32 v3, v2
 ; GFX10-FLUSH-NEXT:    s_denorm_mode 3
@@ -1080,7 +1080,7 @@ define <2 x float> @v_rcp_v2f32(<2 x float> %x) {
 ; GFX10-FLUSH-NEXT:    v_fmac_f32_e32 v5, v6, v3
 ; GFX10-FLUSH-NEXT:    v_fma_f32 v2, -v2, v5, v4
 ; GFX10-FLUSH-NEXT:    s_denorm_mode 0
-; GFX10-FLUSH-NEXT:    v_div_scale_f32 v4, s4, v1, v1, 1.0
+; GFX10-FLUSH-NEXT:    v_div_scale_f32 v4, null, v1, v1, 1.0
 ; GFX10-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX10-FLUSH-NEXT:    v_rcp_f32_e32 v3, v4
 ; GFX10-FLUSH-NEXT:    v_div_fixup_f32 v0, v2, v0, 1.0
@@ -1222,8 +1222,8 @@ define <2 x float> @v_rcp_v2f32_arcp(<2 x float> %x) {
 ; GFX10-IEEE:       ; %bb.0:
 ; GFX10-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-IEEE-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-IEEE-NEXT:    v_div_scale_f32 v2, s4, v0, v0, 1.0
-; GFX10-IEEE-NEXT:    v_div_scale_f32 v3, s4, v1, v1, 1.0
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v2, null, v0, v0, 1.0
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v3, null, v1, v1, 1.0
 ; GFX10-IEEE-NEXT:    v_div_scale_f32 v8, vcc_lo, 1.0, v0, 1.0
 ; GFX10-IEEE-NEXT:    v_rcp_f32_e32 v4, v2
 ; GFX10-IEEE-NEXT:    v_rcp_f32_e32 v5, v3
@@ -1251,7 +1251,7 @@ define <2 x float> @v_rcp_v2f32_arcp(<2 x float> %x) {
 ; GFX10-FLUSH:       ; %bb.0:
 ; GFX10-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-FLUSH-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-FLUSH-NEXT:    v_div_scale_f32 v2, s4, v0, v0, 1.0
+; GFX10-FLUSH-NEXT:    v_div_scale_f32 v2, null, v0, v0, 1.0
 ; GFX10-FLUSH-NEXT:    v_div_scale_f32 v4, vcc_lo, 1.0, v0, 1.0
 ; GFX10-FLUSH-NEXT:    v_rcp_f32_e32 v3, v2
 ; GFX10-FLUSH-NEXT:    s_denorm_mode 3
@@ -1262,7 +1262,7 @@ define <2 x float> @v_rcp_v2f32_arcp(<2 x float> %x) {
 ; GFX10-FLUSH-NEXT:    v_fmac_f32_e32 v5, v6, v3
 ; GFX10-FLUSH-NEXT:    v_fma_f32 v2, -v2, v5, v4
 ; GFX10-FLUSH-NEXT:    s_denorm_mode 0
-; GFX10-FLUSH-NEXT:    v_div_scale_f32 v4, s4, v1, v1, 1.0
+; GFX10-FLUSH-NEXT:    v_div_scale_f32 v4, null, v1, v1, 1.0
 ; GFX10-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
 ; GFX10-FLUSH-NEXT:    v_rcp_f32_e32 v3, v4
 ; GFX10-FLUSH-NEXT:    v_div_fixup_f32 v0, v2, v0, 1.0
@@ -1459,8 +1459,8 @@ define <2 x float> @v_fdiv_v2f32_arcp_ulp25(<2 x float> %a, <2 x float> %b) {
 ; GFX10-IEEE:       ; %bb.0:
 ; GFX10-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-IEEE-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-IEEE-NEXT:    v_div_scale_f32 v4, s4, v2, v2, v0
-; GFX10-IEEE-NEXT:    v_div_scale_f32 v5, s4, v3, v3, v1
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v4, null, v2, v2, v0
+; GFX10-IEEE-NEXT:    v_div_scale_f32 v5, null, v3, v3, v1
 ; GFX10-IEEE-NEXT:    v_div_scale_f32 v10, vcc_lo, v0, v2, v0
 ; GFX10-IEEE-NEXT:    v_rcp_f32_e32 v6, v4
 ; GFX10-IEEE-NEXT:    v_rcp_f32_e32 v7, v5

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll
index 39153cb8a744..765fdeb16eb0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll
@@ -67,7 +67,7 @@ define double @v_fdiv_f64(double %a, double %b) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_div_scale_f64 v[4:5], s4, v[2:3], v[2:3], v[0:1]
+; GFX10-NEXT:    v_div_scale_f64 v[4:5], null, v[2:3], v[2:3], v[0:1]
 ; GFX10-NEXT:    v_div_scale_f64 v[10:11], vcc_lo, v[0:1], v[2:3], v[0:1]
 ; GFX10-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
 ; GFX10-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
@@ -170,7 +170,7 @@ define double @v_fdiv_f64_ulp25(double %a, double %b) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_div_scale_f64 v[4:5], s4, v[2:3], v[2:3], v[0:1]
+; GFX10-NEXT:    v_div_scale_f64 v[4:5], null, v[2:3], v[2:3], v[0:1]
 ; GFX10-NEXT:    v_div_scale_f64 v[10:11], vcc_lo, v[0:1], v[2:3], v[0:1]
 ; GFX10-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
 ; GFX10-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
@@ -243,7 +243,7 @@ define double @v_rcp_f64(double %x) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_div_scale_f64 v[2:3], s4, v[0:1], v[0:1], 1.0
+; GFX10-NEXT:    v_div_scale_f64 v[2:3], null, v[0:1], v[0:1], 1.0
 ; GFX10-NEXT:    v_div_scale_f64 v[8:9], vcc_lo, 1.0, v[0:1], 1.0
 ; GFX10-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; GFX10-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
@@ -316,7 +316,7 @@ define double @v_rcp_f64_arcp(double %x) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_div_scale_f64 v[2:3], s4, v[0:1], v[0:1], 1.0
+; GFX10-NEXT:    v_div_scale_f64 v[2:3], null, v[0:1], v[0:1], 1.0
 ; GFX10-NEXT:    v_div_scale_f64 v[8:9], vcc_lo, 1.0, v[0:1], 1.0
 ; GFX10-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; GFX10-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
@@ -420,7 +420,7 @@ define double @v_rcp_f64_ulp25(double %x) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_div_scale_f64 v[2:3], s4, v[0:1], v[0:1], 1.0
+; GFX10-NEXT:    v_div_scale_f64 v[2:3], null, v[0:1], v[0:1], 1.0
 ; GFX10-NEXT:    v_div_scale_f64 v[8:9], vcc_lo, 1.0, v[0:1], 1.0
 ; GFX10-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
 ; GFX10-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
@@ -523,7 +523,7 @@ define double @v_fdiv_f64_arcp_ulp25(double %a, double %b) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_div_scale_f64 v[4:5], s4, v[2:3], v[2:3], v[0:1]
+; GFX10-NEXT:    v_div_scale_f64 v[4:5], null, v[2:3], v[2:3], v[0:1]
 ; GFX10-NEXT:    v_div_scale_f64 v[10:11], vcc_lo, v[0:1], v[2:3], v[0:1]
 ; GFX10-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
 ; GFX10-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
@@ -634,8 +634,8 @@ define <2 x double> @v_fdiv_v2f64(<2 x double> %a, <2 x double> %b) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_div_scale_f64 v[8:9], s4, v[4:5], v[4:5], v[0:1]
-; GFX10-NEXT:    v_div_scale_f64 v[10:11], s4, v[6:7], v[6:7], v[2:3]
+; GFX10-NEXT:    v_div_scale_f64 v[8:9], null, v[4:5], v[4:5], v[0:1]
+; GFX10-NEXT:    v_div_scale_f64 v[10:11], null, v[6:7], v[6:7], v[2:3]
 ; GFX10-NEXT:    v_div_scale_f64 v[20:21], vcc_lo, v[0:1], v[4:5], v[0:1]
 ; GFX10-NEXT:    v_rcp_f64_e32 v[12:13], v[8:9]
 ; GFX10-NEXT:    v_rcp_f64_e32 v[14:15], v[10:11]
@@ -804,8 +804,8 @@ define <2 x double> @v_fdiv_v2f64_ulp25(<2 x double> %a, <2 x double> %b) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_div_scale_f64 v[8:9], s4, v[4:5], v[4:5], v[0:1]
-; GFX10-NEXT:    v_div_scale_f64 v[10:11], s4, v[6:7], v[6:7], v[2:3]
+; GFX10-NEXT:    v_div_scale_f64 v[8:9], null, v[4:5], v[4:5], v[0:1]
+; GFX10-NEXT:    v_div_scale_f64 v[10:11], null, v[6:7], v[6:7], v[2:3]
 ; GFX10-NEXT:    v_div_scale_f64 v[20:21], vcc_lo, v[0:1], v[4:5], v[0:1]
 ; GFX10-NEXT:    v_rcp_f64_e32 v[12:13], v[8:9]
 ; GFX10-NEXT:    v_rcp_f64_e32 v[14:15], v[10:11]
@@ -928,8 +928,8 @@ define <2 x double> @v_rcp_v2f64(<2 x double> %x) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_div_scale_f64 v[4:5], s4, v[0:1], v[0:1], 1.0
-; GFX10-NEXT:    v_div_scale_f64 v[6:7], s4, v[2:3], v[2:3], 1.0
+; GFX10-NEXT:    v_div_scale_f64 v[4:5], null, v[0:1], v[0:1], 1.0
+; GFX10-NEXT:    v_div_scale_f64 v[6:7], null, v[2:3], v[2:3], 1.0
 ; GFX10-NEXT:    v_div_scale_f64 v[16:17], vcc_lo, 1.0, v[0:1], 1.0
 ; GFX10-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
 ; GFX10-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
@@ -1052,8 +1052,8 @@ define <2 x double> @v_rcp_v2f64_arcp(<2 x double> %x) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_div_scale_f64 v[4:5], s4, v[0:1], v[0:1], 1.0
-; GFX10-NEXT:    v_div_scale_f64 v[6:7], s4, v[2:3], v[2:3], 1.0
+; GFX10-NEXT:    v_div_scale_f64 v[4:5], null, v[0:1], v[0:1], 1.0
+; GFX10-NEXT:    v_div_scale_f64 v[6:7], null, v[2:3], v[2:3], 1.0
 ; GFX10-NEXT:    v_div_scale_f64 v[16:17], vcc_lo, 1.0, v[0:1], 1.0
 ; GFX10-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
 ; GFX10-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
@@ -1223,8 +1223,8 @@ define <2 x double> @v_rcp_v2f64_ulp25(<2 x double> %x) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_div_scale_f64 v[4:5], s4, v[0:1], v[0:1], 1.0
-; GFX10-NEXT:    v_div_scale_f64 v[6:7], s4, v[2:3], v[2:3], 1.0
+; GFX10-NEXT:    v_div_scale_f64 v[4:5], null, v[0:1], v[0:1], 1.0
+; GFX10-NEXT:    v_div_scale_f64 v[6:7], null, v[2:3], v[2:3], 1.0
 ; GFX10-NEXT:    v_div_scale_f64 v[16:17], vcc_lo, 1.0, v[0:1], 1.0
 ; GFX10-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
 ; GFX10-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
@@ -1393,8 +1393,8 @@ define <2 x double> @v_fdiv_v2f64_arcp_ulp25(<2 x double> %a, <2 x double> %b) {
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_div_scale_f64 v[8:9], s4, v[4:5], v[4:5], v[0:1]
-; GFX10-NEXT:    v_div_scale_f64 v[10:11], s4, v[6:7], v[6:7], v[2:3]
+; GFX10-NEXT:    v_div_scale_f64 v[8:9], null, v[4:5], v[4:5], v[0:1]
+; GFX10-NEXT:    v_div_scale_f64 v[10:11], null, v[6:7], v[6:7], v[2:3]
 ; GFX10-NEXT:    v_div_scale_f64 v[20:21], vcc_lo, v[0:1], v[4:5], v[0:1]
 ; GFX10-NEXT:    v_rcp_f64_e32 v[12:13], v[8:9]
 ; GFX10-NEXT:    v_rcp_f64_e32 v[14:15], v[10:11]

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll
index ef3d6f6b479e..c7e8b66ace82 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll
@@ -54,7 +54,7 @@ define amdgpu_kernel void @test_div_scale_f32_1(float addrspace(1)* %out, float
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    global_load_dword v2, v0, s[2:3] offset:4 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_div_scale_f32 v0, s2, v2, v2, v1
+; GFX10-NEXT:    v_div_scale_f32 v0, null, v2, v2, v1
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX10-NEXT:    s_endpgm
@@ -121,7 +121,7 @@ define amdgpu_kernel void @test_div_scale_f32_2(float addrspace(1)* %out, float
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    global_load_dword v2, v0, s[2:3] offset:4 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_div_scale_f32 v0, s2, v1, v2, v1
+; GFX10-NEXT:    v_div_scale_f32 v0, null, v1, v2, v1
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX10-NEXT:    s_endpgm
@@ -193,7 +193,7 @@ define amdgpu_kernel void @test_div_scale_f64_1(double addrspace(1)* %out, doubl
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    global_load_dwordx2 v[2:3], v4, s[2:3] offset:8 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_div_scale_f64 v[0:1], s2, v[2:3], v[2:3], v[0:1]
+; GFX10-NEXT:    v_div_scale_f64 v[0:1], null, v[2:3], v[2:3], v[0:1]
 ; GFX10-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX10-NEXT:    s_endpgm
@@ -265,7 +265,7 @@ define amdgpu_kernel void @test_div_scale_f64_2(double addrspace(1)* %out, doubl
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    global_load_dwordx2 v[2:3], v4, s[2:3] offset:8 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_div_scale_f64 v[0:1], s2, v[0:1], v[2:3], v[0:1]
+; GFX10-NEXT:    v_div_scale_f64 v[0:1], null, v[0:1], v[2:3], v[0:1]
 ; GFX10-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX10-NEXT:    s_endpgm
@@ -328,7 +328,7 @@ define amdgpu_kernel void @test_div_scale_f32_scalar_num_1(float addrspace(1)* %
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    global_load_dword v0, v0, s[6:7]
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_div_scale_f32 v0, s0, v0, v0, s0
+; GFX10-NEXT:    v_div_scale_f32 v0, null, v0, v0, s0
 ; GFX10-NEXT:    global_store_dword v1, v0, s[4:5]
 ; GFX10-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -388,7 +388,7 @@ define amdgpu_kernel void @test_div_scale_f32_scalar_num_2(float addrspace(1)* %
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    global_load_dword v0, v0, s[6:7]
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_div_scale_f32 v0, s0, s0, v0, s0
+; GFX10-NEXT:    v_div_scale_f32 v0, null, s0, v0, s0
 ; GFX10-NEXT:    global_store_dword v1, v0, s[4:5]
 ; GFX10-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -448,7 +448,7 @@ define amdgpu_kernel void @test_div_scale_f32_scalar_den_1(float addrspace(1)* %
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    global_load_dword v0, v0, s[6:7]
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_div_scale_f32 v0, s0, s0, s0, v0
+; GFX10-NEXT:    v_div_scale_f32 v0, null, s0, s0, v0
 ; GFX10-NEXT:    global_store_dword v1, v0, s[4:5]
 ; GFX10-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -508,7 +508,7 @@ define amdgpu_kernel void @test_div_scale_f32_scalar_den_2(float addrspace(1)* %
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    global_load_dword v0, v0, s[6:7]
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_div_scale_f32 v0, s0, v0, s0, v0
+; GFX10-NEXT:    v_div_scale_f32 v0, null, v0, s0, v0
 ; GFX10-NEXT:    global_store_dword v1, v0, s[4:5]
 ; GFX10-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -568,7 +568,7 @@ define amdgpu_kernel void @test_div_scale_f64_scalar_num_1(double addrspace(1)*
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    global_load_dwordx2 v[0:1], v0, s[6:7]
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_div_scale_f64 v[0:1], s0, v[0:1], v[0:1], s[0:1]
+; GFX10-NEXT:    v_div_scale_f64 v[0:1], null, v[0:1], v[0:1], s[0:1]
 ; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[4:5]
 ; GFX10-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -628,7 +628,7 @@ define amdgpu_kernel void @test_div_scale_f64_scalar_num_2(double addrspace(1)*
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    global_load_dwordx2 v[0:1], v0, s[6:7]
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_div_scale_f64 v[0:1], s0, s[0:1], v[0:1], s[0:1]
+; GFX10-NEXT:    v_div_scale_f64 v[0:1], null, s[0:1], v[0:1], s[0:1]
 ; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[4:5]
 ; GFX10-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -688,7 +688,7 @@ define amdgpu_kernel void @test_div_scale_f64_scalar_den_1(double addrspace(1)*
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    global_load_dwordx2 v[0:1], v0, s[6:7]
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_div_scale_f64 v[0:1], s0, s[0:1], s[0:1], v[0:1]
+; GFX10-NEXT:    v_div_scale_f64 v[0:1], null, s[0:1], s[0:1], v[0:1]
 ; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[4:5]
 ; GFX10-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -748,7 +748,7 @@ define amdgpu_kernel void @test_div_scale_f64_scalar_den_2(double addrspace(1)*
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    global_load_dwordx2 v[0:1], v0, s[6:7]
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_div_scale_f64 v[0:1], s0, v[0:1], s[0:1], v[0:1]
+; GFX10-NEXT:    v_div_scale_f64 v[0:1], null, v[0:1], s[0:1], v[0:1]
 ; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[4:5]
 ; GFX10-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -797,7 +797,7 @@ define amdgpu_kernel void @test_div_scale_f32_all_scalar_1(float addrspace(1)* %
 ; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    v_div_scale_f32 v0, s0, s5, s5, s4
+; GFX10-NEXT:    v_div_scale_f32 v0, null, s5, s5, s4
 ; GFX10-NEXT:    global_store_dword v1, v0, s[2:3]
 ; GFX10-NEXT:    s_endpgm
   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false)
@@ -841,7 +841,7 @@ define amdgpu_kernel void @test_div_scale_f32_all_scalar_2(float addrspace(1)* %
 ; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    v_div_scale_f32 v0, s0, s4, s5, s4
+; GFX10-NEXT:    v_div_scale_f32 v0, null, s4, s5, s4
 ; GFX10-NEXT:    global_store_dword v1, v0, s[2:3]
 ; GFX10-NEXT:    s_endpgm
   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true)
@@ -887,7 +887,7 @@ define amdgpu_kernel void @test_div_scale_f64_all_scalar_1(double addrspace(1)*
 ; GFX10-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    v_div_scale_f64 v[0:1], s2, s[4:5], s[4:5], s[2:3]
+; GFX10-NEXT:    v_div_scale_f64 v[0:1], null, s[4:5], s[4:5], s[2:3]
 ; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX10-NEXT:    s_endpgm
   %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false)
@@ -933,7 +933,7 @@ define amdgpu_kernel void @test_div_scale_f64_all_scalar_2(double addrspace(1)*
 ; GFX10-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    v_div_scale_f64 v[0:1], s2, s[2:3], s[4:5], s[2:3]
+; GFX10-NEXT:    v_div_scale_f64 v[0:1], null, s[2:3], s[4:5], s[2:3]
 ; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX10-NEXT:    s_endpgm
   %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true)
@@ -985,7 +985,7 @@ define amdgpu_kernel void @test_div_scale_f32_inline_imm_num(float addrspace(1)*
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    global_load_dword v0, v0, s[2:3]
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_div_scale_f32 v0, s2, v0, v0, 1.0
+; GFX10-NEXT:    v_div_scale_f32 v0, null, v0, v0, 1.0
 ; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX10-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1041,7 +1041,7 @@ define amdgpu_kernel void @test_div_scale_f32_inline_imm_den(float addrspace(1)*
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    global_load_dword v0, v0, s[2:3]
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_div_scale_f32 v0, s2, 2.0, 2.0, v0
+; GFX10-NEXT:    v_div_scale_f32 v0, null, 2.0, 2.0, v0
 ; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX10-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1108,7 +1108,7 @@ define amdgpu_kernel void @test_div_scale_f32_fabs_num(float addrspace(1)* %out,
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    v_and_b32_e32 v0, 0x7fffffff, v1
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10-NEXT:    v_div_scale_f32 v0, s2, v2, v2, v0
+; GFX10-NEXT:    v_div_scale_f32 v0, null, v2, v2, v0
 ; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX10-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -1179,7 +1179,7 @@ define amdgpu_kernel void @test_div_scale_f32_fabs_den(float addrspace(1)* %out,
 ; GFX10-NEXT:    global_load_dword v2, v0, s[2:3] offset:4 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    v_and_b32_e32 v0, 0x7fffffff, v2
-; GFX10-NEXT:    v_div_scale_f32 v0, s2, v0, v0, v1
+; GFX10-NEXT:    v_div_scale_f32 v0, null, v0, v0, v1
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX10-NEXT:    s_endpgm
@@ -1226,7 +1226,7 @@ define amdgpu_kernel void @test_div_scale_f32_val_undef_val(float addrspace(1)*
 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    v_div_scale_f32 v0, s2, s0, s0, 0x41000000
+; GFX10-NEXT:    v_div_scale_f32 v0, null, s0, s0, 0x41000000
 ; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX10-NEXT:    s_endpgm
   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.0, float undef, i1 false)
@@ -1263,7 +1263,7 @@ define amdgpu_kernel void @test_div_scale_f32_undef_val_val(float addrspace(1)*
 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    v_div_scale_f32 v0, s2, 0x41000000, 0x41000000, s0
+; GFX10-NEXT:    v_div_scale_f32 v0, null, 0x41000000, 0x41000000, s0
 ; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX10-NEXT:    s_endpgm
   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float 8.0, i1 false)
@@ -1298,7 +1298,7 @@ define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(float addrspace(1)
 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    v_div_scale_f32 v0, s2, s0, s0, s0
+; GFX10-NEXT:    v_div_scale_f32 v0, null, s0, s0, s0
 ; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX10-NEXT:    s_endpgm
   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float undef, i1 false)
@@ -1337,7 +1337,7 @@ define amdgpu_kernel void @test_div_scale_f64_val_undef_val(double addrspace(1)*
 ; GFX10-NEXT:    s_mov_b32 s2, 0
 ; GFX10-NEXT:    s_mov_b32 s3, 0x40200000
 ; GFX10-NEXT:    v_mov_b32_e32 v2, 0
-; GFX10-NEXT:    v_div_scale_f64 v[0:1], s2, s[0:1], s[0:1], s[2:3]
+; GFX10-NEXT:    v_div_scale_f64 v[0:1], null, s[0:1], s[0:1], s[2:3]
 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
index 45c81b901182..7fab7f64e20c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
@@ -384,7 +384,7 @@ define i64 @v_mul_i64(i64 %num, i64 %den) {
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_mov_b32_e32 v4, v0
 ; GFX10-NEXT:    v_mov_b32_e32 v5, v1
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s4, v4, v2, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, v4, v2, 0
 ; GFX10-NEXT:    v_mul_lo_u32 v3, v4, v3
 ; GFX10-NEXT:    v_mul_lo_u32 v2, v5, v2
 ; GFX10-NEXT:    v_add3_u32 v1, v1, v3, v2
@@ -514,10 +514,10 @@ define i96 @v_mul_i96(i96 %num, i96 %den) {
 ; GFX10-NEXT:    v_mul_lo_u32 v2, v2, v3
 ; GFX10-NEXT:    v_mul_lo_u32 v5, v6, v5
 ; GFX10-NEXT:    v_mul_lo_u32 v8, v7, v4
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s4, v6, v3, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, v6, v3, 0
 ; GFX10-NEXT:    v_add3_u32 v2, v5, v8, v2
-; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s4, v6, v4, v[1:2]
-; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s4, v7, v3, v[1:2]
+; GFX10-NEXT:    v_mad_u64_u32 v[1:2], null, v6, v4, v[1:2]
+; GFX10-NEXT:    v_mad_u64_u32 v[1:2], null, v7, v3, v[1:2]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %result = mul i96 %num, %den
   ret i96 %result
@@ -769,12 +769,12 @@ define i128 @v_mul_i128(i128 %num, i128 %den) {
 ; GFX10-NEXT:    v_mov_b32_e32 v9, v1
 ; GFX10-NEXT:    v_mov_b32_e32 v10, v2
 ; GFX10-NEXT:    v_mul_lo_u32 v3, v3, v4
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s4, v8, v6, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, v8, v6, 0
 ; GFX10-NEXT:    v_mul_lo_u32 v7, v8, v7
 ; GFX10-NEXT:    v_mul_lo_u32 v6, v9, v6
-; GFX10-NEXT:    v_mad_u64_u32 v[11:12], s4, v9, v5, v[0:1]
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s4, v8, v4, 0
-; GFX10-NEXT:    v_mad_u64_u32 v[11:12], s4, v10, v4, v[11:12]
+; GFX10-NEXT:    v_mad_u64_u32 v[11:12], null, v9, v5, v[0:1]
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, v8, v4, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[11:12], null, v10, v4, v[11:12]
 ; GFX10-NEXT:    v_mov_b32_e32 v2, v11
 ; GFX10-NEXT:    v_mad_u64_u32 v[1:2], vcc_lo, v8, v5, v[1:2]
 ; GFX10-NEXT:    v_mul_lo_u32 v5, v10, v5
@@ -1813,24 +1813,24 @@ define i256 @v_mul_i256(i256 %num, i256 %den) {
 ; GFX10-NEXT:    v_mul_lo_u32 v27, v6, v9
 ; GFX10-NEXT:    v_mul_lo_u32 v28, v5, v10
 ; GFX10-NEXT:    v_mul_lo_u32 v7, v7, v8
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s4, v16, v14, 0
-; GFX10-NEXT:    v_mad_u64_u32 v[18:19], s4, v16, v12, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, v16, v14, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[18:19], null, v16, v12, 0
 ; GFX10-NEXT:    v_mul_lo_u32 v30, v17, v14
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s4, v17, v13, v[0:1]
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s4, v2, v12, v[0:1]
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, v17, v13, v[0:1]
 ; GFX10-NEXT:    v_mad_u64_u32 v[18:19], s4, v17, v11, v[18:19]
 ; GFX10-NEXT:    v_cndmask_b32_e64 v20, 0, 1, s4
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s5, v3, v11, v[0:1]
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, v2, v12, v[0:1]
 ; GFX10-NEXT:    v_mad_u64_u32 v[18:19], vcc_lo, v2, v10, v[18:19]
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v22, vcc_lo, 0, v20, vcc_lo
-; GFX10-NEXT:    v_mad_u64_u32 v[20:21], s4, v16, v10, 0
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s4, v4, v10, v[0:1]
+; GFX10-NEXT:    v_mad_u64_u32 v[20:21], null, v16, v10, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, v3, v11, v[0:1]
 ; GFX10-NEXT:    v_mad_u64_u32 v[18:19], vcc_lo, v3, v9, v[18:19]
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v24, vcc_lo, 0, v22, vcc_lo
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s4, v5, v9, v[0:1]
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, v4, v10, v[0:1]
 ; GFX10-NEXT:    v_mad_u64_u32 v[18:19], vcc_lo, v4, v8, v[18:19]
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v26, vcc_lo, 0, v24, vcc_lo
-; GFX10-NEXT:    v_mad_u64_u32 v[22:23], s4, v6, v8, v[0:1]
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, v5, v9, v[0:1]
+; GFX10-NEXT:    v_mad_u64_u32 v[22:23], null, v6, v8, v[0:1]
 ; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s4, v17, v9, v[20:21]
 ; GFX10-NEXT:    v_cndmask_b32_e64 v25, 0, 1, s4
 ; GFX10-NEXT:    v_mov_b32_e32 v20, v22
@@ -1842,7 +1842,7 @@ define i256 @v_mul_i256(i256 %num, i256 %den) {
 ; GFX10-NEXT:    v_mul_lo_u32 v22, v16, v15
 ; GFX10-NEXT:    v_mad_u64_u32 v[24:25], vcc_lo, v17, v12, v[0:1]
 ; GFX10-NEXT:    v_mad_u64_u32 v[14:15], s6, v16, v11, v[19:20]
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s5, v16, v8, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, v16, v8, 0
 ; GFX10-NEXT:    v_mul_lo_u32 v20, v4, v11
 ; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s6
 ; GFX10-NEXT:    v_mad_u64_u32 v[18:19], s5, v2, v11, v[24:25]

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll
index 3b133078aa90..48bf3167bb50 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll
@@ -488,7 +488,7 @@ define amdgpu_kernel void @sdivrem_i64(i64 addrspace(1)* %out0, i64 addrspace(1)
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v2, v1
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v3, v0
 ; GFX10-NEXT:    v_mul_lo_u32 v4, s10, v2
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s14, s10, v3, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, s10, v3, 0
 ; GFX10-NEXT:    v_mul_lo_u32 v5, s11, v3
 ; GFX10-NEXT:    v_mul_hi_u32 v6, v2, v0
 ; GFX10-NEXT:    v_add3_u32 v1, v1, v4, v5
@@ -514,7 +514,7 @@ define amdgpu_kernel void @sdivrem_i64(i64 addrspace(1)* %out0, i64 addrspace(1)
 ; GFX10-NEXT:    v_add3_u32 v1, v5, v4, v1
 ; GFX10-NEXT:    v_mul_lo_u32 v4, s11, v3
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, v2, v1, vcc_lo
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s14, s10, v3, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, s10, v3, 0
 ; GFX10-NEXT:    v_mul_lo_u32 v5, s10, v2
 ; GFX10-NEXT:    v_mul_hi_u32 v6, v2, v0
 ; GFX10-NEXT:    v_add3_u32 v1, v1, v5, v4
@@ -561,7 +561,7 @@ define amdgpu_kernel void @sdivrem_i64(i64 addrspace(1)* %out0, i64 addrspace(1)
 ; GFX10-NEXT:    v_mul_lo_u32 v4, s9, v2
 ; GFX10-NEXT:    v_add_co_u32 v6, vcc_lo, v2, 1
 ; GFX10-NEXT:    v_add3_u32 v3, v3, v0, v1
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s10, s8, v2, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, s8, v2, 0
 ; GFX10-NEXT:    v_mul_lo_u32 v5, s8, v3
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v7, vcc_lo, 0, v3, vcc_lo
 ; GFX10-NEXT:    v_add3_u32 v1, v1, v5, v4
@@ -1930,7 +1930,9 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX10-NEXT:    v_cvt_f32_u32_e32 v2, s10
 ; GFX10-NEXT:    v_rcp_iflag_f32_e32 v0, v0
 ; GFX10-NEXT:    s_xor_b64 s[14:15], s[6:7], s[12:13]
+; GFX10-NEXT:    s_sub_u32 s3, 0, s10
 ; GFX10-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v1
+; GFX10-NEXT:    s_subb_u32 s6, 0, s11
 ; GFX10-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX10-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GFX10-NEXT:    v_rcp_iflag_f32_e32 v1, v1
@@ -1945,18 +1947,16 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX10-NEXT:    v_trunc_f32_e32 v4, v4
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v6, v0
 ; GFX10-NEXT:    v_mul_f32_e32 v2, 0xcf800000, v4
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s3, s20, v6, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, s20, v6, 0
 ; GFX10-NEXT:    v_mul_lo_u32 v8, s21, v6
 ; GFX10-NEXT:    v_add_f32_e32 v2, v2, v3
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v3, v4
-; GFX10-NEXT:    s_sub_u32 s3, 0, s10
-; GFX10-NEXT:    s_subb_u32 s6, 0, s11
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v4, v2
 ; GFX10-NEXT:    v_mul_lo_u32 v9, s3, v3
 ; GFX10-NEXT:    v_add3_u32 v7, v1, v7, v8
 ; GFX10-NEXT:    v_mul_lo_u32 v10, v5, v0
 ; GFX10-NEXT:    v_mul_hi_u32 v11, v6, v0
-; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s7, s3, v4, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[1:2], null, s3, v4, 0
 ; GFX10-NEXT:    v_mul_lo_u32 v8, s6, v4
 ; GFX10-NEXT:    v_mul_lo_u32 v12, v6, v7
 ; GFX10-NEXT:    v_mul_hi_u32 v0, v5, v0
@@ -1980,8 +1980,8 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX10-NEXT:    v_add_nc_u32_e32 v10, v12, v10
 ; GFX10-NEXT:    v_add_co_u32 v8, s7, v8, v15
 ; GFX10-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s7
-; GFX10-NEXT:    v_mul_hi_u32 v16, v4, v2
 ; GFX10-NEXT:    v_add_nc_u32_e32 v11, v13, v11
+; GFX10-NEXT:    v_mul_hi_u32 v16, v4, v2
 ; GFX10-NEXT:    v_add_co_u32 v1, s7, v14, v1
 ; GFX10-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s7
 ; GFX10-NEXT:    v_add_co_u32 v0, s7, v0, v10
@@ -1990,14 +1990,14 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s7
 ; GFX10-NEXT:    v_add_co_u32 v9, s7, v1, v16
 ; GFX10-NEXT:    v_add3_u32 v7, v11, v10, v7
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s7
 ; GFX10-NEXT:    v_add_co_u32 v6, vcc_lo, v6, v0
 ; GFX10-NEXT:    v_add_nc_u32_e32 v8, v12, v8
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s7
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, v5, v7, vcc_lo
 ; GFX10-NEXT:    v_mul_hi_u32 v2, v3, v2
-; GFX10-NEXT:    v_add_nc_u32_e32 v10, v13, v1
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s7, s20, v6, 0
 ; GFX10-NEXT:    v_add_co_u32 v7, s7, v9, v8
+; GFX10-NEXT:    v_add_nc_u32_e32 v10, v13, v1
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, s20, v6, 0
 ; GFX10-NEXT:    v_mul_lo_u32 v9, s21, v6
 ; GFX10-NEXT:    v_mul_lo_u32 v11, s20, v5
 ; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s7
@@ -2009,7 +2009,7 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX10-NEXT:    v_mul_hi_u32 v0, v5, v0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, v3, v2, vcc_lo
 ; GFX10-NEXT:    v_mul_lo_u32 v12, v6, v7
-; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s7, s3, v4, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[1:2], null, s3, v4, 0
 ; GFX10-NEXT:    v_mul_lo_u32 v9, s6, v4
 ; GFX10-NEXT:    v_mul_lo_u32 v11, s3, v3
 ; GFX10-NEXT:    v_mul_lo_u32 v13, v5, v7
@@ -2074,7 +2074,7 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
 ; GFX10-NEXT:    v_mul_hi_u32 v8, s14, v4
 ; GFX10-NEXT:    v_add3_u32 v2, v6, v1, v2
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s3, s8, v9, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, s8, v9, 0
 ; GFX10-NEXT:    v_mul_lo_u32 v6, s9, v9
 ; GFX10-NEXT:    v_mul_lo_u32 v7, s8, v5
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, v3, v2, vcc_lo
@@ -2126,7 +2126,7 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX10-NEXT:    v_add3_u32 v2, v3, v1, v2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v3, v6, v19, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v6, v7, v20, vcc_lo
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s10, v4, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, s10, v4, 0
 ; GFX10-NEXT:    v_mul_lo_u32 v7, s10, v2
 ; GFX10-NEXT:    v_mul_lo_u32 v11, s11, v4
 ; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, v17

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll
index 91517e444a4e..1097014a5bb0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll
@@ -403,7 +403,7 @@ define amdgpu_kernel void @udivrem_i64(i64 addrspace(1)* %out0, i64 addrspace(1)
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v2, v1
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v3, v0
 ; GFX10-NEXT:    v_mul_lo_u32 v4, s0, v2
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s2, s0, v3, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, s0, v3, 0
 ; GFX10-NEXT:    v_mul_lo_u32 v5, s1, v3
 ; GFX10-NEXT:    v_mul_hi_u32 v6, v2, v0
 ; GFX10-NEXT:    v_add3_u32 v1, v1, v4, v5
@@ -429,7 +429,7 @@ define amdgpu_kernel void @udivrem_i64(i64 addrspace(1)* %out0, i64 addrspace(1)
 ; GFX10-NEXT:    v_add3_u32 v1, v5, v4, v1
 ; GFX10-NEXT:    v_mul_lo_u32 v4, s1, v3
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, v2, v1, vcc_lo
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s2, s0, v3, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, s0, v3, 0
 ; GFX10-NEXT:    v_mul_lo_u32 v5, s0, v2
 ; GFX10-NEXT:    v_mul_hi_u32 v6, v2, v0
 ; GFX10-NEXT:    v_add3_u32 v1, v1, v5, v4
@@ -476,7 +476,7 @@ define amdgpu_kernel void @udivrem_i64(i64 addrspace(1)* %out0, i64 addrspace(1)
 ; GFX10-NEXT:    v_mul_lo_u32 v4, s11, v2
 ; GFX10-NEXT:    v_add_co_u32 v6, vcc_lo, v2, 1
 ; GFX10-NEXT:    v_add3_u32 v3, v3, v0, v1
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s10, v2, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, s10, v2, 0
 ; GFX10-NEXT:    v_mul_lo_u32 v5, s10, v3
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v7, vcc_lo, 0, v3, vcc_lo
 ; GFX10-NEXT:    v_add3_u32 v1, v1, v5, v4
@@ -1553,9 +1553,9 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v5, v0
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v8, v1
 ; GFX10-NEXT:    v_mul_lo_u32 v7, s0, v4
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s6, s0, v5, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, s0, v5, 0
 ; GFX10-NEXT:    v_mul_lo_u32 v9, s1, v5
-; GFX10-NEXT:    v_mad_u64_u32 v[2:3], s6, s2, v8, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[2:3], null, s2, v8, 0
 ; GFX10-NEXT:    v_mul_lo_u32 v11, s3, v8
 ; GFX10-NEXT:    v_add3_u32 v1, v1, v7, v9
 ; GFX10-NEXT:    v_mul_lo_u32 v7, v4, v0
@@ -1599,14 +1599,15 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX10-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s6
 ; GFX10-NEXT:    v_add3_u32 v1, v9, v7, v1
 ; GFX10-NEXT:    v_add_co_u32 v5, vcc_lo, v5, v0
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
 ; GFX10-NEXT:    v_add3_u32 v3, v11, v10, v3
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v4, vcc_lo, v4, v1, vcc_lo
 ; GFX10-NEXT:    v_add_co_u32 v8, vcc_lo, v8, v2
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v6, vcc_lo, v6, v3, vcc_lo
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s6, s0, v5, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, s0, v5, 0
 ; GFX10-NEXT:    v_mul_lo_u32 v7, s1, v5
 ; GFX10-NEXT:    v_mul_lo_u32 v9, s0, v4
-; GFX10-NEXT:    v_mad_u64_u32 v[2:3], s0, s2, v8, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[2:3], null, s2, v8, 0
 ; GFX10-NEXT:    v_mul_lo_u32 v10, s3, v8
 ; GFX10-NEXT:    v_mul_lo_u32 v11, s2, v6
 ; GFX10-NEXT:    v_mul_lo_u32 v12, v4, v0
@@ -1682,9 +1683,9 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX10-NEXT:    v_mul_hi_u32 v11, s10, v3
 ; GFX10-NEXT:    v_add_co_u32 v6, s0, v6, v7
 ; GFX10-NEXT:    v_add3_u32 v5, v5, v0, v1
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s0
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s12, v4, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, s12, v4, 0
 ; GFX10-NEXT:    v_mul_lo_u32 v12, s13, v4
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s0
 ; GFX10-NEXT:    v_mul_lo_u32 v13, s12, v5
 ; GFX10-NEXT:    v_add_co_u32 v2, s0, v10, v2
 ; GFX10-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s0
@@ -1695,14 +1696,13 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX10-NEXT:    v_add3_u32 v1, v1, v13, v12
 ; GFX10-NEXT:    v_add_nc_u32_e32 v6, v7, v6
 ; GFX10-NEXT:    v_mul_hi_u32 v3, s11, v3
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX10-NEXT:    v_mov_b32_e32 v9, 0
 ; GFX10-NEXT:    v_add_nc_u32_e32 v7, v10, v8
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v8, s9, v1
 ; GFX10-NEXT:    v_sub_co_u32 v10, vcc_lo, s8, v0
 ; GFX10-NEXT:    v_sub_co_ci_u32_e64 v11, s0, s9, v1, vcc_lo
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v0, vcc_lo, s13, v8, vcc_lo
 ; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s12, v10
-; GFX10-NEXT:    v_mov_b32_e32 v9, 0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, -1, vcc_lo
 ; GFX10-NEXT:    v_sub_co_u32 v8, vcc_lo, v10, s12
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v12, s0, 0, v0, vcc_lo
@@ -1722,25 +1722,25 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX10-NEXT:    v_add_co_u32 v6, s0, v2, v6
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s0
 ; GFX10-NEXT:    v_add_co_u32 v15, s0, v16, 1
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v18, s0, 0, v17, s0
-; GFX10-NEXT:    v_add3_u32 v3, v7, v1, v3
-; GFX10-NEXT:    v_mad_u64_u32 v[1:2], s0, s14, v6, 0
 ; GFX10-NEXT:    v_mul_lo_u32 v19, s15, v6
 ; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v14
+; GFX10-NEXT:    v_add3_u32 v3, v7, v1, v3
+; GFX10-NEXT:    v_mad_u64_u32 v[1:2], null, s14, v6, 0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v18, s0, 0, v17, s0
 ; GFX10-NEXT:    v_mul_lo_u32 v7, s14, v3
 ; GFX10-NEXT:    v_cndmask_b32_e32 v15, v16, v15, vcc_lo
 ; GFX10-NEXT:    v_sub_co_u32 v16, s0, v8, s12
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v20, s0, 0, v0, s0
-; GFX10-NEXT:    v_add3_u32 v2, v2, v7, v19
-; GFX10-NEXT:    v_sub_co_u32 v7, s0, s10, v1
 ; GFX10-NEXT:    v_cndmask_b32_e32 v17, v17, v18, vcc_lo
 ; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v13
+; GFX10-NEXT:    v_add3_u32 v2, v2, v7, v19
+; GFX10-NEXT:    v_sub_co_u32 v7, s0, s10, v1
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v4, v15, vcc_lo
 ; GFX10-NEXT:    v_sub_co_ci_u32_e64 v13, s1, s11, v2, s0
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v2, s11, v2
 ; GFX10-NEXT:    v_cmp_ne_u32_e64 s1, 0, v14
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, v4, v15, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e64 s2, s15, v13
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v5, v17, vcc_lo
+; GFX10-NEXT:    v_cmp_le_u32_e64 s2, s15, v13
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v2, s0, s15, v2, s0
 ; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s14, v7
 ; GFX10-NEXT:    v_cndmask_b32_e64 v4, v8, v16, s1

diff  --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll
index 0508b97d8500..fa75ea0c96d8 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll
@@ -271,7 +271,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, <4 x i32> %in
 ; GFX10W64-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10W64-NEXT:    v_readfirstlane_b32 s0, v1
 ; GFX10W64-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10W64-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s8, v0, s[0:1]
+; GFX10W64-NEXT:    v_mad_u64_u32 v[0:1], null, s8, v0, s[0:1]
 ; GFX10W64-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10W64-NEXT:    global_store_dword v1, v0, s[2:3]
 ; GFX10W64-NEXT:    s_endpgm
@@ -300,7 +300,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, <4 x i32> %in
 ; GFX10W32-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10W32-NEXT:    v_readfirstlane_b32 s0, v1
 ; GFX10W32-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10W32-NEXT:    v_mad_u64_u32 v[0:1], s0, s4, v0, s[0:1]
+; GFX10W32-NEXT:    v_mad_u64_u32 v[0:1], null, s4, v0, s[0:1]
 ; GFX10W32-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10W32-NEXT:    global_store_dword v1, v0, s[2:3]
 ; GFX10W32-NEXT:    s_endpgm

diff  --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
index a85e17d019c2..458c15a5b382 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
@@ -292,7 +292,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 addrspace
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_mov_b32 s7, 0x31016000
 ; GFX1064-NEXT:    s_mov_b32 s6, -1
-; GFX1064-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s8, v0, s[0:1]
+; GFX1064-NEXT:    v_mad_u64_u32 v[0:1], null, s8, v0, s[0:1]
 ; GFX1064-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; GFX1064-NEXT:    s_endpgm
 ;
@@ -329,7 +329,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 addrspace
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_mov_b32 s7, 0x31016000
 ; GFX1032-NEXT:    s_mov_b32 s6, -1
-; GFX1032-NEXT:    v_mad_u64_u32 v[0:1], s0, s2, v0, s[0:1]
+; GFX1032-NEXT:    v_mad_u64_u32 v[0:1], null, s2, v0, s[0:1]
 ; GFX1032-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; GFX1032-NEXT:    s_endpgm
 entry:
@@ -705,7 +705,7 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out, i64 addrspac
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    v_readfirstlane_b32 s2, v0
 ; GFX1064-NEXT:    v_readfirstlane_b32 s3, v1
-; GFX1064-NEXT:    v_mad_u64_u32 v[0:1], s[2:3], v2, 5, s[2:3]
+; GFX1064-NEXT:    v_mad_u64_u32 v[0:1], null, v2, 5, s[2:3]
 ; GFX1064-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1064-NEXT:    s_mov_b32 s2, -1
 ; GFX1064-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
@@ -742,7 +742,7 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out, i64 addrspac
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    v_readfirstlane_b32 s2, v0
 ; GFX1032-NEXT:    v_readfirstlane_b32 s3, v1
-; GFX1032-NEXT:    v_mad_u64_u32 v[0:1], s2, v2, 5, s[2:3]
+; GFX1032-NEXT:    v_mad_u64_u32 v[0:1], null, v2, 5, s[2:3]
 ; GFX1032-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1032-NEXT:    s_mov_b32 s2, -1
 ; GFX1032-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
@@ -922,8 +922,8 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 addrspace
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    s_mov_b32 s7, 0x31016000
 ; GFX1064-NEXT:    s_mov_b32 s6, -1
-; GFX1064-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s2, v2, s[0:1]
-; GFX1064-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s3, v2, v[1:2]
+; GFX1064-NEXT:    v_mad_u64_u32 v[0:1], null, s2, v2, s[0:1]
+; GFX1064-NEXT:    v_mad_u64_u32 v[1:2], null, s3, v2, v[1:2]
 ; GFX1064-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; GFX1064-NEXT:    s_endpgm
 ;
@@ -965,8 +965,8 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 addrspace
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    s_mov_b32 s7, 0x31016000
 ; GFX1032-NEXT:    s_mov_b32 s6, -1
-; GFX1032-NEXT:    v_mad_u64_u32 v[0:1], s0, s2, v2, s[0:1]
-; GFX1032-NEXT:    v_mad_u64_u32 v[1:2], s0, s3, v2, v[1:2]
+; GFX1032-NEXT:    v_mad_u64_u32 v[0:1], null, s2, v2, s[0:1]
+; GFX1032-NEXT:    v_mad_u64_u32 v[1:2], null, s3, v2, v[1:2]
 ; GFX1032-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; GFX1032-NEXT:    s_endpgm
 entry:
@@ -2043,12 +2043,12 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 addrspace
 ; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT:    v_mad_u64_u32 v[3:4], s[0:1], s2, v2, 0
-; GFX1064-NEXT:    s_mov_b32 s7, 0x31016000
-; GFX1064-NEXT:    s_mov_b32 s6, -1
-; GFX1064-NEXT:    v_mad_u64_u32 v[4:5], s[0:1], s3, v2, v[4:5]
+; GFX1064-NEXT:    v_mad_u64_u32 v[3:4], null, s2, v2, 0
 ; GFX1064-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX1064-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX1064-NEXT:    s_mov_b32 s7, 0x31016000
+; GFX1064-NEXT:    s_mov_b32 s6, -1
+; GFX1064-NEXT:    v_mad_u64_u32 v[4:5], null, s3, v2, v[4:5]
 ; GFX1064-NEXT:    v_sub_co_u32 v0, vcc, s0, v3
 ; GFX1064-NEXT:    v_mov_b32_e32 v1, v4
 ; GFX1064-NEXT:    v_sub_co_ci_u32_e32 v1, vcc, s1, v1, vcc
@@ -2089,12 +2089,12 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 addrspace
 ; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT:    v_mad_u64_u32 v[3:4], s0, s2, v2, 0
+; GFX1032-NEXT:    v_mad_u64_u32 v[3:4], null, s2, v2, 0
+; GFX1032-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX1032-NEXT:    v_readfirstlane_b32 s1, v1
 ; GFX1032-NEXT:    s_mov_b32 s7, 0x31016000
 ; GFX1032-NEXT:    s_mov_b32 s6, -1
-; GFX1032-NEXT:    v_mad_u64_u32 v[4:5], s0, s3, v2, v[4:5]
-; GFX1032-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX1032-NEXT:    v_mad_u64_u32 v[4:5], null, s3, v2, v[4:5]
 ; GFX1032-NEXT:    v_sub_co_u32 v0, vcc_lo, s0, v3
 ; GFX1032-NEXT:    v_mov_b32_e32 v1, v4
 ; GFX1032-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, s1, v1, vcc_lo

diff  --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
index 55b732244a1f..8fc3d04e4248 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
@@ -357,7 +357,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 %additive
 ; GFX1064-NEXT:    v_readfirstlane_b32 s0, v1
 ; GFX1064-NEXT:    s_mov_b32 s7, 0x31016000
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s6, v0, s[0:1]
+; GFX1064-NEXT:    v_mad_u64_u32 v[0:1], null, s6, v0, s[0:1]
 ; GFX1064-NEXT:    s_mov_b32 s6, -1
 ; GFX1064-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; GFX1064-NEXT:    s_endpgm
@@ -391,7 +391,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 %additive
 ; GFX1032-NEXT:    s_mov_b32 s7, 0x31016000
 ; GFX1032-NEXT:    s_mov_b32 s6, -1
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT:    v_mad_u64_u32 v[0:1], s0, s2, v0, s[0:1]
+; GFX1032-NEXT:    v_mad_u64_u32 v[0:1], null, s2, v0, s[0:1]
 ; GFX1032-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; GFX1032-NEXT:    s_endpgm
 ;
@@ -423,7 +423,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 %additive
 ; GFX1164-NEXT:    v_readfirstlane_b32 s0, v1
 ; GFX1164-NEXT:    s_mov_b32 s7, 0x31016000
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT:    v_mad_u64_u32 v[1:2], s[0:1], s6, v0, s[0:1]
+; GFX1164-NEXT:    v_mad_u64_u32 v[1:2], null, s6, v0, s[0:1]
 ; GFX1164-NEXT:    s_mov_b32 s6, -1
 ; GFX1164-NEXT:    buffer_store_b32 v1, off, s[4:7], 0
 ; GFX1164-NEXT:    s_endpgm
@@ -456,7 +456,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 %additive
 ; GFX1132-NEXT:    s_mov_b32 s7, 0x31016000
 ; GFX1132-NEXT:    s_mov_b32 s6, -1
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    v_mad_u64_u32 v[1:2], s0, s0, v0, s[2:3]
+; GFX1132-NEXT:    v_mad_u64_u32 v[1:2], null, s0, v0, s[2:3]
 ; GFX1132-NEXT:    buffer_store_b32 v1, off, s[4:7], 0
 ; GFX1132-NEXT:    s_endpgm
 entry:
@@ -1152,7 +1152,7 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX1064-NEXT:    v_readfirstlane_b32 s2, v0
 ; GFX1064-NEXT:    v_readfirstlane_b32 s3, v1
-; GFX1064-NEXT:    v_mad_u64_u32 v[0:1], s[2:3], v2, 5, s[2:3]
+; GFX1064-NEXT:    v_mad_u64_u32 v[0:1], null, v2, 5, s[2:3]
 ; GFX1064-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1064-NEXT:    s_mov_b32 s2, -1
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1183,7 +1183,7 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s2
 ; GFX1032-NEXT:    v_readfirstlane_b32 s2, v0
 ; GFX1032-NEXT:    v_readfirstlane_b32 s3, v1
-; GFX1032-NEXT:    v_mad_u64_u32 v[0:1], s2, v2, 5, s[2:3]
+; GFX1032-NEXT:    v_mad_u64_u32 v[0:1], null, v2, 5, s[2:3]
 ; GFX1032-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1032-NEXT:    s_mov_b32 s2, -1
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1214,7 +1214,7 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
 ; GFX1164-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX1164-NEXT:    v_readfirstlane_b32 s2, v0
 ; GFX1164-NEXT:    v_readfirstlane_b32 s3, v1
-; GFX1164-NEXT:    v_mad_u64_u32 v[0:1], s[2:3], v2, 5, s[2:3]
+; GFX1164-NEXT:    v_mad_u64_u32 v[0:1], null, v2, 5, s[2:3]
 ; GFX1164-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1164-NEXT:    s_mov_b32 s2, -1
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1244,7 +1244,7 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
 ; GFX1132-NEXT:    s_or_b32 exec_lo, exec_lo, s2
 ; GFX1132-NEXT:    v_readfirstlane_b32 s2, v0
 ; GFX1132-NEXT:    v_readfirstlane_b32 s3, v1
-; GFX1132-NEXT:    v_mad_u64_u32 v[0:1], s2, v2, 5, s[2:3]
+; GFX1132-NEXT:    v_mad_u64_u32 v[0:1], null, v2, 5, s[2:3]
 ; GFX1132-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1132-NEXT:    s_mov_b32 s2, -1
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1411,10 +1411,10 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 %additive
 ; GFX1064-NEXT:    v_readfirstlane_b32 s4, v0
 ; GFX1064-NEXT:    v_readfirstlane_b32 s5, v1
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s2, v2, s[4:5]
-; GFX1064-NEXT:    v_mad_u64_u32 v[1:2], s[2:3], s3, v2, v[1:2]
-; GFX1064-NEXT:    s_mov_b32 s3, 0x31016000
+; GFX1064-NEXT:    v_mad_u64_u32 v[0:1], null, s2, v2, s[4:5]
 ; GFX1064-NEXT:    s_mov_b32 s2, -1
+; GFX1064-NEXT:    v_mad_u64_u32 v[1:2], null, s3, v2, v[1:2]
+; GFX1064-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1064-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; GFX1064-NEXT:    s_endpgm
 ;
@@ -1448,10 +1448,10 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 %additive
 ; GFX1032-NEXT:    v_readfirstlane_b32 s4, v0
 ; GFX1032-NEXT:    v_readfirstlane_b32 s5, v1
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT:    v_mad_u64_u32 v[0:1], s2, s2, v2, s[4:5]
-; GFX1032-NEXT:    v_mad_u64_u32 v[1:2], s2, s3, v2, v[1:2]
-; GFX1032-NEXT:    s_mov_b32 s3, 0x31016000
+; GFX1032-NEXT:    v_mad_u64_u32 v[0:1], null, s2, v2, s[4:5]
 ; GFX1032-NEXT:    s_mov_b32 s2, -1
+; GFX1032-NEXT:    v_mad_u64_u32 v[1:2], null, s3, v2, v[1:2]
+; GFX1032-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1032-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; GFX1032-NEXT:    s_endpgm
 ;
@@ -1485,10 +1485,10 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 %additive
 ; GFX1164-NEXT:    v_readfirstlane_b32 s4, v0
 ; GFX1164-NEXT:    v_readfirstlane_b32 s5, v1
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], s2, v2, s[4:5]
-; GFX1164-NEXT:    v_mad_u64_u32 v[3:4], s[2:3], s3, v2, v[1:2]
-; GFX1164-NEXT:    s_mov_b32 s3, 0x31016000
+; GFX1164-NEXT:    v_mad_u64_u32 v[0:1], null, s2, v2, s[4:5]
 ; GFX1164-NEXT:    s_mov_b32 s2, -1
+; GFX1164-NEXT:    v_mad_u64_u32 v[3:4], null, s3, v2, v[1:2]
+; GFX1164-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1164-NEXT:    v_mov_b32_e32 v1, v3
 ; GFX1164-NEXT:    buffer_store_b64 v[0:1], off, s[0:3], 0
 ; GFX1164-NEXT:    s_endpgm
@@ -1522,10 +1522,10 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 %additive
 ; GFX1132-NEXT:    v_readfirstlane_b32 s4, v0
 ; GFX1132-NEXT:    v_readfirstlane_b32 s5, v1
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    v_mad_u64_u32 v[0:1], s2, s2, v2, s[4:5]
-; GFX1132-NEXT:    v_mad_u64_u32 v[3:4], s2, s3, v2, v[1:2]
-; GFX1132-NEXT:    s_mov_b32 s3, 0x31016000
+; GFX1132-NEXT:    v_mad_u64_u32 v[0:1], null, s2, v2, s[4:5]
 ; GFX1132-NEXT:    s_mov_b32 s2, -1
+; GFX1132-NEXT:    v_mad_u64_u32 v[3:4], null, s3, v2, v[1:2]
+; GFX1132-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1132-NEXT:    v_mov_b32_e32 v1, v3
 ; GFX1132-NEXT:    buffer_store_b64 v[0:1], off, s[0:3], 0
 ; GFX1132-NEXT:    s_endpgm
@@ -3034,14 +3034,14 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 %subitive
 ; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], s2, v2, 0
-; GFX1064-NEXT:    v_readfirstlane_b32 s4, v1
-; GFX1064-NEXT:    v_mad_u64_u32 v[4:5], s[2:3], s3, v2, v[4:5]
+; GFX1064-NEXT:    v_mad_u64_u32 v[3:4], null, s2, v2, 0
 ; GFX1064-NEXT:    v_readfirstlane_b32 s2, v0
-; GFX1064-NEXT:    s_mov_b32 s3, 0x31016000
+; GFX1064-NEXT:    v_readfirstlane_b32 s4, v1
+; GFX1064-NEXT:    v_mad_u64_u32 v[4:5], null, s3, v2, v[4:5]
 ; GFX1064-NEXT:    v_sub_co_u32 v0, vcc, s2, v3
-; GFX1064-NEXT:    v_mov_b32_e32 v1, v4
+; GFX1064-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1064-NEXT:    s_mov_b32 s2, -1
+; GFX1064-NEXT:    v_mov_b32_e32 v1, v4
 ; GFX1064-NEXT:    v_sub_co_ci_u32_e32 v1, vcc, s4, v1, vcc
 ; GFX1064-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; GFX1064-NEXT:    s_endpgm
@@ -3074,14 +3074,14 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 %subitive
 ; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT:    v_mad_u64_u32 v[3:4], s2, s2, v2, 0
-; GFX1032-NEXT:    v_readfirstlane_b32 s4, v1
-; GFX1032-NEXT:    v_mad_u64_u32 v[4:5], s2, s3, v2, v[4:5]
+; GFX1032-NEXT:    v_mad_u64_u32 v[3:4], null, s2, v2, 0
 ; GFX1032-NEXT:    v_readfirstlane_b32 s2, v0
-; GFX1032-NEXT:    s_mov_b32 s3, 0x31016000
+; GFX1032-NEXT:    v_readfirstlane_b32 s4, v1
+; GFX1032-NEXT:    v_mad_u64_u32 v[4:5], null, s3, v2, v[4:5]
 ; GFX1032-NEXT:    v_sub_co_u32 v0, vcc_lo, s2, v3
-; GFX1032-NEXT:    v_mov_b32_e32 v1, v4
+; GFX1032-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1032-NEXT:    s_mov_b32 s2, -1
+; GFX1032-NEXT:    v_mov_b32_e32 v1, v4
 ; GFX1032-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, s4, v1, vcc_lo
 ; GFX1032-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; GFX1032-NEXT:    s_endpgm
@@ -3114,14 +3114,14 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 %subitive
 ; GFX1164-NEXT:  .LBB12_2:
 ; GFX1164-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX1164-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], s2, v2, 0
-; GFX1164-NEXT:    v_readfirstlane_b32 s4, v1
-; GFX1164-NEXT:    v_mad_u64_u32 v[5:6], s[2:3], s3, v2, v[4:5]
+; GFX1164-NEXT:    v_mad_u64_u32 v[3:4], null, s2, v2, 0
 ; GFX1164-NEXT:    v_readfirstlane_b32 s2, v0
-; GFX1164-NEXT:    s_mov_b32 s3, 0x31016000
+; GFX1164-NEXT:    v_readfirstlane_b32 s4, v1
+; GFX1164-NEXT:    v_mad_u64_u32 v[5:6], null, s3, v2, v[4:5]
 ; GFX1164-NEXT:    v_sub_co_u32 v0, vcc, s2, v3
-; GFX1164-NEXT:    v_mov_b32_e32 v1, v5
+; GFX1164-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1164-NEXT:    s_mov_b32 s2, -1
+; GFX1164-NEXT:    v_mov_b32_e32 v1, v5
 ; GFX1164-NEXT:    v_sub_co_ci_u32_e32 v1, vcc, s4, v1, vcc
 ; GFX1164-NEXT:    buffer_store_b64 v[0:1], off, s[0:3], 0
 ; GFX1164-NEXT:    s_endpgm
@@ -3153,14 +3153,14 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 %subitive
 ; GFX1132-NEXT:  .LBB12_2:
 ; GFX1132-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX1132-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132-NEXT:    v_mad_u64_u32 v[3:4], s2, s2, v2, 0
-; GFX1132-NEXT:    v_readfirstlane_b32 s4, v1
-; GFX1132-NEXT:    v_mad_u64_u32 v[5:6], s2, s3, v2, v[4:5]
+; GFX1132-NEXT:    v_mad_u64_u32 v[3:4], null, s2, v2, 0
 ; GFX1132-NEXT:    v_readfirstlane_b32 s2, v0
-; GFX1132-NEXT:    s_mov_b32 s3, 0x31016000
+; GFX1132-NEXT:    v_readfirstlane_b32 s4, v1
+; GFX1132-NEXT:    v_mad_u64_u32 v[5:6], null, s3, v2, v[4:5]
 ; GFX1132-NEXT:    v_sub_co_u32 v0, vcc_lo, s2, v3
-; GFX1132-NEXT:    v_mov_b32_e32 v1, v5
+; GFX1132-NEXT:    s_mov_b32 s3, 0x31016000
 ; GFX1132-NEXT:    s_mov_b32 s2, -1
+; GFX1132-NEXT:    v_mov_b32_e32 v1, v5
 ; GFX1132-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, s4, v1, vcc_lo
 ; GFX1132-NEXT:    buffer_store_b64 v[0:1], off, s[0:3], 0
 ; GFX1132-NEXT:    s_endpgm

diff  --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll
index 007f7e6ef7c7..a33e7dc1550d 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll
@@ -270,7 +270,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, <4 x i32> %in
 ; GFX10W64-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10W64-NEXT:    v_readfirstlane_b32 s0, v1
 ; GFX10W64-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10W64-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s8, v0, s[0:1]
+; GFX10W64-NEXT:    v_mad_u64_u32 v[0:1], null, s8, v0, s[0:1]
 ; GFX10W64-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10W64-NEXT:    global_store_dword v1, v0, s[2:3]
 ; GFX10W64-NEXT:    s_endpgm
@@ -299,7 +299,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, <4 x i32> %in
 ; GFX10W32-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10W32-NEXT:    v_readfirstlane_b32 s0, v1
 ; GFX10W32-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10W32-NEXT:    v_mad_u64_u32 v[0:1], s0, s4, v0, s[0:1]
+; GFX10W32-NEXT:    v_mad_u64_u32 v[0:1], null, s4, v0, s[0:1]
 ; GFX10W32-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10W32-NEXT:    global_store_dword v1, v0, s[2:3]
 ; GFX10W32-NEXT:    s_endpgm

diff  --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll
index 9cfd9df76444..6cd1d8b8e791 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll
@@ -279,7 +279,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, <4 x i32> %in
 ; GFX10W64-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10W64-NEXT:    v_readfirstlane_b32 s0, v1
 ; GFX10W64-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10W64-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s8, v0, s[0:1]
+; GFX10W64-NEXT:    v_mad_u64_u32 v[0:1], null, s8, v0, s[0:1]
 ; GFX10W64-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10W64-NEXT:    global_store_dword v1, v0, s[2:3]
 ; GFX10W64-NEXT:    s_endpgm
@@ -309,7 +309,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, <4 x i32> %in
 ; GFX10W32-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10W32-NEXT:    v_readfirstlane_b32 s0, v1
 ; GFX10W32-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10W32-NEXT:    v_mad_u64_u32 v[0:1], s0, s4, v0, s[0:1]
+; GFX10W32-NEXT:    v_mad_u64_u32 v[0:1], null, s4, v0, s[0:1]
 ; GFX10W32-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10W32-NEXT:    global_store_dword v1, v0, s[2:3]
 ; GFX10W32-NEXT:    s_endpgm

diff  --git a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll
index 578faa4a6ea0..860fa9ab31bf 100644
--- a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll
+++ b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll
@@ -52,7 +52,7 @@ entry:
 ; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
 ;
 ; GFX1010: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}
-; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]]
+; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, null, s{{[0-9]+}}, 0, [[CARRY]]
 define amdgpu_kernel void @vadd64rr(i64 addrspace(1)* %out, i64 %a) {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -82,7 +82,7 @@ entry:
 ; GFX9: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
 ;
 ; GFX1010: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], 0x56789876, v{{[0-9]+}}
-; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], 0, 0x1234, [[CARRY]]
+; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, null, 0, 0x1234, [[CARRY]]
 define amdgpu_kernel void @vadd64ri(i64 addrspace(1)* %out) {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -171,7 +171,7 @@ define amdgpu_kernel void @suaddo64(i64 addrspace(1)* %out, i1 addrspace(1)* %ca
 ; GFX9:	v_addc_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
 ;
 ; GFX1010: v_add_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v0
-; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]]
+; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, null, s{{[0-9]+}}, 0, [[CARRY]]
 define amdgpu_kernel void @vuaddo64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
@@ -237,7 +237,7 @@ entry:
 ; GFX9: v_subbrev_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
 ;
 ; GFX1010: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}
-; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]]
+; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, null, s{{[0-9]+}}, 0, [[CARRY]]
 define amdgpu_kernel void @vsub64rr(i64 addrspace(1)* %out, i64 %a) {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -267,7 +267,7 @@ entry:
 ; GFX9: v_subbrev_co_u32_e32 v1, vcc, 0, v1, vcc
 ;
 ; GFX1010: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], 0x56789876, v{{[0-9]+}}
-; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], 0x1234, 0, [[CARRY]]
+; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, null, 0x1234, 0, [[CARRY]]
 define amdgpu_kernel void @vsub64ri(i64 addrspace(1)* %out) {
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -356,7 +356,7 @@ define amdgpu_kernel void @susubo64(i64 addrspace(1)* %out, i1 addrspace(1)* %ca
 ; GFX9:	v_subbrev_co_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
 ;
 ; GFX1010: v_sub_co_u32 v{{[0-9]+}}, [[CARRY:s[0-9]+]], s{{[0-9]+}}, v0
-; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], s{{[0-9]+}}, 0, [[CARRY]]
+; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, null, s{{[0-9]+}}, 0, [[CARRY]]
 define amdgpu_kernel void @vusubo64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 %a) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64

diff  --git a/llvm/test/CodeGen/AMDGPU/frem.ll b/llvm/test/CodeGen/AMDGPU/frem.ll
index c10143c928b2..cee3afc3e5bf 100644
--- a/llvm/test/CodeGen/AMDGPU/frem.ll
+++ b/llvm/test/CodeGen/AMDGPU/frem.ll
@@ -561,7 +561,7 @@ define amdgpu_kernel void @frem_f32(float addrspace(1)* %out, float addrspace(1)
 ; GFX10-NEXT:    global_load_dword v1, v0, s[6:7]
 ; GFX10-NEXT:    global_load_dword v2, v0, s[2:3] offset:16
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_div_scale_f32 v4, s0, v2, v2, v1
+; GFX10-NEXT:    v_div_scale_f32 v4, null, v2, v2, v1
 ; GFX10-NEXT:    v_div_scale_f32 v3, vcc_lo, v1, v2, v1
 ; GFX10-NEXT:    v_rcp_f32_e32 v5, v4
 ; GFX10-NEXT:    s_denorm_mode 15
@@ -976,7 +976,7 @@ define amdgpu_kernel void @frem_f64(double addrspace(1)* %out, double addrspace(
 ; GFX10-NEXT:    global_load_dwordx2 v[0:1], v12, s[6:7]
 ; GFX10-NEXT:    global_load_dwordx2 v[2:3], v12, s[2:3]
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_div_scale_f64 v[4:5], s0, v[2:3], v[2:3], v[0:1]
+; GFX10-NEXT:    v_div_scale_f64 v[4:5], null, v[2:3], v[2:3], v[0:1]
 ; GFX10-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
 ; GFX10-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
 ; GFX10-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
@@ -2140,7 +2140,7 @@ define amdgpu_kernel void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float
 ; GFX10-NEXT:    global_load_dwordx2 v[0:1], v4, s[6:7]
 ; GFX10-NEXT:    global_load_dwordx2 v[2:3], v4, s[2:3] offset:32
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_div_scale_f32 v6, s0, v3, v3, v1
+; GFX10-NEXT:    v_div_scale_f32 v6, null, v3, v3, v1
 ; GFX10-NEXT:    v_div_scale_f32 v5, vcc_lo, v1, v3, v1
 ; GFX10-NEXT:    v_rcp_f32_e32 v7, v6
 ; GFX10-NEXT:    s_denorm_mode 15
@@ -2155,7 +2155,7 @@ define amdgpu_kernel void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float
 ; GFX10-NEXT:    v_div_fixup_f32 v5, v5, v3, v1
 ; GFX10-NEXT:    v_trunc_f32_e32 v5, v5
 ; GFX10-NEXT:    v_fma_f32 v1, -v5, v3, v1
-; GFX10-NEXT:    v_div_scale_f32 v5, s0, v2, v2, v0
+; GFX10-NEXT:    v_div_scale_f32 v5, null, v2, v2, v0
 ; GFX10-NEXT:    v_div_scale_f32 v3, vcc_lo, v0, v2, v0
 ; GFX10-NEXT:    v_rcp_f32_e32 v6, v5
 ; GFX10-NEXT:    s_denorm_mode 15
@@ -2506,7 +2506,7 @@ define amdgpu_kernel void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float
 ; GFX10-NEXT:    global_load_dwordx4 v[0:3], v8, s[6:7]
 ; GFX10-NEXT:    global_load_dwordx4 v[4:7], v8, s[2:3] offset:64
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_div_scale_f32 v10, s0, v7, v7, v3
+; GFX10-NEXT:    v_div_scale_f32 v10, null, v7, v7, v3
 ; GFX10-NEXT:    v_div_scale_f32 v9, vcc_lo, v3, v7, v3
 ; GFX10-NEXT:    v_rcp_f32_e32 v11, v10
 ; GFX10-NEXT:    s_denorm_mode 15
@@ -2521,7 +2521,7 @@ define amdgpu_kernel void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float
 ; GFX10-NEXT:    v_div_fixup_f32 v9, v9, v7, v3
 ; GFX10-NEXT:    v_trunc_f32_e32 v9, v9
 ; GFX10-NEXT:    v_fma_f32 v3, -v9, v7, v3
-; GFX10-NEXT:    v_div_scale_f32 v9, s0, v6, v6, v2
+; GFX10-NEXT:    v_div_scale_f32 v9, null, v6, v6, v2
 ; GFX10-NEXT:    v_div_scale_f32 v7, vcc_lo, v2, v6, v2
 ; GFX10-NEXT:    v_rcp_f32_e32 v10, v9
 ; GFX10-NEXT:    s_denorm_mode 15
@@ -2536,7 +2536,7 @@ define amdgpu_kernel void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float
 ; GFX10-NEXT:    v_div_fixup_f32 v7, v7, v6, v2
 ; GFX10-NEXT:    v_trunc_f32_e32 v7, v7
 ; GFX10-NEXT:    v_fma_f32 v2, -v7, v6, v2
-; GFX10-NEXT:    v_div_scale_f32 v7, s0, v5, v5, v1
+; GFX10-NEXT:    v_div_scale_f32 v7, null, v5, v5, v1
 ; GFX10-NEXT:    v_div_scale_f32 v6, vcc_lo, v1, v5, v1
 ; GFX10-NEXT:    v_rcp_f32_e32 v9, v7
 ; GFX10-NEXT:    s_denorm_mode 15
@@ -2551,7 +2551,7 @@ define amdgpu_kernel void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float
 ; GFX10-NEXT:    v_div_fixup_f32 v6, v6, v5, v1
 ; GFX10-NEXT:    v_trunc_f32_e32 v6, v6
 ; GFX10-NEXT:    v_fma_f32 v1, -v6, v5, v1
-; GFX10-NEXT:    v_div_scale_f32 v6, s0, v4, v4, v0
+; GFX10-NEXT:    v_div_scale_f32 v6, null, v4, v4, v0
 ; GFX10-NEXT:    v_div_scale_f32 v5, vcc_lo, v0, v4, v0
 ; GFX10-NEXT:    v_rcp_f32_e32 v7, v6
 ; GFX10-NEXT:    s_denorm_mode 15
@@ -2807,7 +2807,7 @@ define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x doub
 ; GFX10-NEXT:    global_load_dwordx4 v[0:3], v16, s[6:7]
 ; GFX10-NEXT:    global_load_dwordx4 v[4:7], v16, s[2:3] offset:64
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_div_scale_f64 v[8:9], s0, v[6:7], v[6:7], v[2:3]
+; GFX10-NEXT:    v_div_scale_f64 v[8:9], null, v[6:7], v[6:7], v[2:3]
 ; GFX10-NEXT:    v_rcp_f64_e32 v[10:11], v[8:9]
 ; GFX10-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[10:11], 1.0
 ; GFX10-NEXT:    v_fma_f64 v[10:11], v[10:11], v[12:13], v[10:11]
@@ -2820,7 +2820,7 @@ define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x doub
 ; GFX10-NEXT:    v_div_fixup_f64 v[8:9], v[8:9], v[6:7], v[2:3]
 ; GFX10-NEXT:    v_trunc_f64_e32 v[8:9], v[8:9]
 ; GFX10-NEXT:    v_fma_f64 v[2:3], -v[8:9], v[6:7], v[2:3]
-; GFX10-NEXT:    v_div_scale_f64 v[6:7], s0, v[4:5], v[4:5], v[0:1]
+; GFX10-NEXT:    v_div_scale_f64 v[6:7], null, v[4:5], v[4:5], v[0:1]
 ; GFX10-NEXT:    v_rcp_f64_e32 v[8:9], v[6:7]
 ; GFX10-NEXT:    v_fma_f64 v[10:11], -v[6:7], v[8:9], 1.0
 ; GFX10-NEXT:    v_fma_f64 v[8:9], v[8:9], v[10:11], v[8:9]

diff  --git a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll
index e0c7b72ebe4a..b02c46c560ed 100644
--- a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.ll
@@ -121,7 +121,7 @@ define amdgpu_ps float @global_xchg_saddr_i32_rtn_2048(i8 addrspace(1)* inreg %s
 ; GFX10-LABEL: global_xchg_saddr_i32_rtn_2048:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_add_co_u32 v0, s[0:1], s2, v0
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v3, s[0:1], s3, 0, s[0:1]
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v3, null, s3, 0, s[0:1]
 ; GFX10-NEXT:    v_add_co_u32 v2, vcc, 0x800, v0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v3, vcc, 0, v3, vcc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)

diff  --git a/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll
index 9f3edae99aa1..897dec0235a5 100644
--- a/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-saddr-load.ll
@@ -86,7 +86,7 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg4096(i8 addrspace(1)* inr
 ; GFX10-LABEL: global_load_saddr_i8_offset_neg4096:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_add_co_u32 v0, s[0:1], 0xfffff000, s2
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1]
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s3, s[0:1]
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
@@ -111,7 +111,7 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg4097(i8 addrspace(1)* inr
 ; GFX10-LABEL: global_load_saddr_i8_offset_neg4097:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_add_co_u32 v0, s[0:1], 0xfffff000, s2
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1]
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s3, s[0:1]
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
@@ -136,7 +136,7 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg4098(i8 addrspace(1)* inr
 ; GFX10-LABEL: global_load_saddr_i8_offset_neg4098:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_add_co_u32 v0, s[0:1], 0xfffff000, s2
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1]
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s3, s[0:1]
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
@@ -240,7 +240,7 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg2049(i8 addrspace(1)* inr
 ; GFX10-LABEL: global_load_saddr_i8_offset_neg2049:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_add_co_u32 v0, s[0:1], 0xfffff800, s2
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1]
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s3, s[0:1]
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
@@ -263,7 +263,7 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg2050(i8 addrspace(1)* inr
 ; GFX10-LABEL: global_load_saddr_i8_offset_neg2050:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_add_co_u32 v0, s[0:1], 0xfffff800, s2
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1]
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s3, s[0:1]
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
@@ -308,7 +308,7 @@ define amdgpu_ps float @global_load_saddr_i8_offset_4294967296(i8 addrspace(1)*
 ; GFX10-LABEL: global_load_saddr_i8_offset_4294967296:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_add_co_u32 v0, s[0:1], 0, s2
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s[0:1], 1, s3, s[0:1]
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, s3, s[0:1]
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
@@ -332,7 +332,7 @@ define amdgpu_ps float @global_load_saddr_i8_offset_4294967297(i8 addrspace(1)*
 ; GFX10-LABEL: global_load_saddr_i8_offset_4294967297:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_add_co_u32 v0, s[0:1], 0, s2
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s[0:1], 1, s3, s[0:1]
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, s3, s[0:1]
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:1
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
@@ -356,7 +356,7 @@ define amdgpu_ps float @global_load_saddr_i8_offset_4294971391(i8 addrspace(1)*
 ; GFX10-LABEL: global_load_saddr_i8_offset_4294971391:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_add_co_u32 v0, s[0:1], 0x800, s2
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s[0:1], 1, s3, s[0:1]
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, s3, s[0:1]
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
@@ -380,7 +380,7 @@ define amdgpu_ps float @global_load_saddr_i8_offset_4294971392(i8 addrspace(1)*
 ; GFX10-LABEL: global_load_saddr_i8_offset_4294971392:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_add_co_u32 v0, s[0:1], 0x1000, s2
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s[0:1], 1, s3, s[0:1]
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, 1, s3, s[0:1]
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
@@ -405,7 +405,7 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg4294967295(i8 addrspace(1
 ; GFX10-LABEL: global_load_saddr_i8_offset_neg4294967295:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_add_co_u32 v0, s[0:1], 0x800, s2
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1]
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s3, s[0:1]
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2047
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
@@ -429,7 +429,7 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg4294967296(i8 addrspace(1
 ; GFX10-LABEL: global_load_saddr_i8_offset_neg4294967296:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_add_co_u32 v0, s[0:1], 0, s2
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1]
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s3, s[0:1]
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
@@ -453,7 +453,7 @@ define amdgpu_ps float @global_load_saddr_i8_offset_neg4294967297(i8 addrspace(1
 ; GFX10-LABEL: global_load_saddr_i8_offset_neg4294967297:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_add_co_u32 v0, s[0:1], 0, s2
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s[0:1], -1, s3, s[0:1]
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s3, s[0:1]
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    ; return to shader part epilog
@@ -494,7 +494,7 @@ define amdgpu_ps float @global_load_saddr_i8_zext_vgpr_offset_4095(i8 addrspace(
 ; GFX10-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_add_co_u32 v0, s[0:1], s2, v0
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s[0:1], s3, 0, s[0:1]
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, s3, 0, s[0:1]
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc, 0x800, v0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
@@ -525,7 +525,7 @@ define amdgpu_ps float @global_load_saddr_i8_zext_vgpr_offset_4096(i8 addrspace(
 ; GFX10-LABEL: global_load_saddr_i8_zext_vgpr_offset_4096:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_add_co_u32 v0, s[0:1], s2, v0
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s[0:1], s3, 0, s[0:1]
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, s3, 0, s[0:1]
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc, 0x1000, v0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
@@ -551,7 +551,7 @@ define amdgpu_ps float @global_load_saddr_i8_zext_vgpr_offset_neg4096(i8 addrspa
 ; GFX10-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4096:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_add_co_u32 v0, s[0:1], s2, v0
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s[0:1], s3, 0, s[0:1]
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, s3, 0, s[0:1]
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc, 0xfffff000, v0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc, -1, v1, vcc
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
@@ -582,7 +582,7 @@ define amdgpu_ps float @global_load_saddr_i8_zext_vgpr_offset_neg4097(i8 addrspa
 ; GFX10-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg4097:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_add_co_u32 v0, s[0:1], s2, v0
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s[0:1], s3, 0, s[0:1]
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, s3, 0, s[0:1]
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc, 0xfffff000, v0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc, -1, v1, vcc
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
@@ -624,7 +624,7 @@ define amdgpu_ps float @global_load_saddr_i8_zext_vgpr_offset_2048(i8 addrspace(
 ; GFX10-LABEL: global_load_saddr_i8_zext_vgpr_offset_2048:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_add_co_u32 v0, s[0:1], s2, v0
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s[0:1], s3, 0, s[0:1]
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, s3, 0, s[0:1]
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc, 0x800, v0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
@@ -666,7 +666,7 @@ define amdgpu_ps float @global_load_saddr_i8_zext_vgpr_offset_neg2049(i8 addrspa
 ; GFX10-LABEL: global_load_saddr_i8_zext_vgpr_offset_neg2049:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_add_co_u32 v0, s[0:1], s2, v0
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s[0:1], s3, 0, s[0:1]
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, s3, 0, s[0:1]
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc, 0xfffff800, v0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc, -1, v1, vcc
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
@@ -692,7 +692,7 @@ define amdgpu_ps float @global_load_saddr_i8_zext_vgpr_offset_4095_gep_order(i8
 ; GFX10-LABEL: global_load_saddr_i8_zext_vgpr_offset_4095_gep_order:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    v_add_co_u32 v0, s[0:1], s2, v0
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s[0:1], s3, 0, s[0:1]
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, s3, 0, s[0:1]
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc, 0x800, v0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll
index 24b236b5dd34..e64e437342de 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll
@@ -131,9 +131,9 @@ define amdgpu_kernel void @image_bvh_intersect_ray_nsa_reassign(i32* %p_node_ptr
 ; GFX1013-NEXT:    v_mov_b32_e32 v10, 0x41000000
 ; GFX1013-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1013-NEXT:    v_add_co_u32 v2, s4, s4, v0
-; GFX1013-NEXT:    v_add_co_ci_u32_e64 v3, s4, s5, 0, s4
+; GFX1013-NEXT:    v_add_co_ci_u32_e64 v3, null, s5, 0, s4
 ; GFX1013-NEXT:    v_add_co_u32 v4, s4, s6, v0
-; GFX1013-NEXT:    v_add_co_ci_u32_e64 v5, s4, s7, 0, s4
+; GFX1013-NEXT:    v_add_co_ci_u32_e64 v5, null, s7, 0, s4
 ; GFX1013-NEXT:    flat_load_dword v0, v[2:3]
 ; GFX1013-NEXT:    flat_load_dword v1, v[4:5]
 ; GFX1013-NEXT:    v_mov_b32_e32 v2, 0
@@ -160,9 +160,9 @@ define amdgpu_kernel void @image_bvh_intersect_ray_nsa_reassign(i32* %p_node_ptr
 ; GFX1030-NEXT:    v_mov_b32_e32 v4, 2.0
 ; GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1030-NEXT:    v_add_co_u32 v0, s4, s4, v2
-; GFX1030-NEXT:    v_add_co_ci_u32_e64 v1, s4, s5, 0, s4
+; GFX1030-NEXT:    v_add_co_ci_u32_e64 v1, null, s5, 0, s4
 ; GFX1030-NEXT:    v_add_co_u32 v2, s4, s6, v2
-; GFX1030-NEXT:    v_add_co_ci_u32_e64 v3, s4, s7, 0, s4
+; GFX1030-NEXT:    v_add_co_ci_u32_e64 v3, null, s7, 0, s4
 ; GFX1030-NEXT:    flat_load_dword v0, v[0:1]
 ; GFX1030-NEXT:    flat_load_dword v1, v[2:3]
 ; GFX1030-NEXT:    v_mov_b32_e32 v2, 0
@@ -202,9 +202,9 @@ define amdgpu_kernel void @image_bvh_intersect_ray_a16_nsa_reassign(i32* %p_node
 ; GFX1013-NEXT:    v_mov_b32_e32 v7, 0x48004700
 ; GFX1013-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1013-NEXT:    v_add_co_u32 v2, s4, s4, v0
-; GFX1013-NEXT:    v_add_co_ci_u32_e64 v3, s4, s5, 0, s4
+; GFX1013-NEXT:    v_add_co_ci_u32_e64 v3, null, s5, 0, s4
 ; GFX1013-NEXT:    v_add_co_u32 v4, s4, s6, v0
-; GFX1013-NEXT:    v_add_co_ci_u32_e64 v5, s4, s7, 0, s4
+; GFX1013-NEXT:    v_add_co_ci_u32_e64 v5, null, s7, 0, s4
 ; GFX1013-NEXT:    flat_load_dword v0, v[2:3]
 ; GFX1013-NEXT:    flat_load_dword v1, v[4:5]
 ; GFX1013-NEXT:    v_mov_b32_e32 v2, 0
@@ -228,9 +228,9 @@ define amdgpu_kernel void @image_bvh_intersect_ray_a16_nsa_reassign(i32* %p_node
 ; GFX1030-NEXT:    v_mov_b32_e32 v7, 0x48004700
 ; GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1030-NEXT:    v_add_co_u32 v0, s4, s4, v2
-; GFX1030-NEXT:    v_add_co_ci_u32_e64 v1, s4, s5, 0, s4
+; GFX1030-NEXT:    v_add_co_ci_u32_e64 v1, null, s5, 0, s4
 ; GFX1030-NEXT:    v_add_co_u32 v2, s4, s6, v2
-; GFX1030-NEXT:    v_add_co_ci_u32_e64 v3, s4, s7, 0, s4
+; GFX1030-NEXT:    v_add_co_ci_u32_e64 v3, null, s7, 0, s4
 ; GFX1030-NEXT:    flat_load_dword v0, v[0:1]
 ; GFX1030-NEXT:    flat_load_dword v1, v[2:3]
 ; GFX1030-NEXT:    v_mov_b32_e32 v2, 0
@@ -277,7 +277,7 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(float* %p_ray,
 ; GFX1013-NEXT:    v_mov_b32_e32 v11, 0x41000000
 ; GFX1013-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1013-NEXT:    v_add_co_u32 v0, s4, s4, v0
-; GFX1013-NEXT:    v_add_co_ci_u32_e64 v1, s4, s5, 0, s4
+; GFX1013-NEXT:    v_add_co_ci_u32_e64 v1, null, s5, 0, s4
 ; GFX1013-NEXT:    flat_load_dword v2, v[0:1]
 ; GFX1013-NEXT:    v_mov_b32_e32 v0, 0xb36211c7
 ; GFX1013-NEXT:    v_mov_b32_e32 v1, 0x102
@@ -303,7 +303,7 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(float* %p_ray,
 ; GFX1030-NEXT:    v_mov_b32_e32 v4, 1.0
 ; GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1030-NEXT:    v_add_co_u32 v0, s4, s4, v0
-; GFX1030-NEXT:    v_add_co_ci_u32_e64 v1, s4, s5, 0, s4
+; GFX1030-NEXT:    v_add_co_ci_u32_e64 v1, null, s5, 0, s4
 ; GFX1030-NEXT:    flat_load_dword v2, v[0:1]
 ; GFX1030-NEXT:    v_mov_b32_e32 v1, 0x102
 ; GFX1030-NEXT:    v_mov_b32_e32 v0, 0xb36211c7
@@ -344,7 +344,7 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(float* %p_
 ; GFX1013-NEXT:    v_mov_b32_e32 v8, 0x48004700
 ; GFX1013-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1013-NEXT:    v_add_co_u32 v0, s4, s4, v0
-; GFX1013-NEXT:    v_add_co_ci_u32_e64 v1, s4, s5, 0, s4
+; GFX1013-NEXT:    v_add_co_ci_u32_e64 v1, null, s5, 0, s4
 ; GFX1013-NEXT:    flat_load_dword v2, v[0:1]
 ; GFX1013-NEXT:    v_mov_b32_e32 v0, 0xb36211c6
 ; GFX1013-NEXT:    v_mov_b32_e32 v1, 0x102
@@ -367,7 +367,7 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(float* %p_
 ; GFX1030-NEXT:    v_mov_b32_e32 v8, 0x48004700
 ; GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1030-NEXT:    v_add_co_u32 v0, s4, s4, v0
-; GFX1030-NEXT:    v_add_co_ci_u32_e64 v1, s4, s5, 0, s4
+; GFX1030-NEXT:    v_add_co_ci_u32_e64 v1, null, s5, 0, s4
 ; GFX1030-NEXT:    flat_load_dword v2, v[0:1]
 ; GFX1030-NEXT:    v_mov_b32_e32 v1, 0x102
 ; GFX1030-NEXT:    v_mov_b32_e32 v0, 0xb36211c6

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll b/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll
index 4d43abfceedf..e0a2705b7fef 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll
@@ -58,10 +58,10 @@ define { i64, i1 } @umulo_i64_v_v(i64 %x, i64 %y) {
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_mov_b32_e32 v4, v0
 ; GFX10-NEXT:    v_mov_b32_e32 v5, v1
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s4, v4, v2, 0
-; GFX10-NEXT:    v_mad_u64_u32 v[6:7], s4, v4, v3, 0
-; GFX10-NEXT:    v_mad_u64_u32 v[9:10], s4, v5, v2, 0
-; GFX10-NEXT:    v_mad_u64_u32 v[11:12], s4, v5, v3, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, v4, v2, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[6:7], null, v4, v3, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[9:10], null, v5, v2, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[11:12], null, v5, v3, 0
 ; GFX10-NEXT:    v_mov_b32_e32 v8, v1
 ; GFX10-NEXT:    v_mul_lo_u32 v5, v5, v2
 ; GFX10-NEXT:    v_mul_lo_u32 v4, v4, v3
@@ -83,10 +83,10 @@ define { i64, i1 } @umulo_i64_v_v(i64 %x, i64 %y) {
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    v_mov_b32_e32 v4, v0
 ; GFX11-NEXT:    v_mov_b32_e32 v5, v1
-; GFX11-NEXT:    v_mad_u64_u32 v[0:1], s0, v4, v2, 0
-; GFX11-NEXT:    v_mad_u64_u32 v[6:7], s0, v4, v3, 0
-; GFX11-NEXT:    v_mad_u64_u32 v[9:10], s0, v5, v2, 0
-; GFX11-NEXT:    v_mad_u64_u32 v[11:12], s0, v5, v3, 0
+; GFX11-NEXT:    v_mad_u64_u32 v[0:1], null, v4, v2, 0
+; GFX11-NEXT:    v_mad_u64_u32 v[6:7], null, v4, v3, 0
+; GFX11-NEXT:    v_mad_u64_u32 v[9:10], null, v5, v2, 0
+; GFX11-NEXT:    v_mad_u64_u32 v[11:12], null, v5, v3, 0
 ; GFX11-NEXT:    v_mov_b32_e32 v8, v1
 ; GFX11-NEXT:    v_mul_lo_u32 v5, v5, v2
 ; GFX11-NEXT:    v_mul_lo_u32 v4, v4, v3
@@ -186,10 +186,10 @@ define { i64, i1 } @smulo_i64_v_v(i64 %x, i64 %y) {
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_mov_b32_e32 v4, v0
 ; GFX10-NEXT:    v_mov_b32_e32 v5, v1
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s4, v4, v2, 0
-; GFX10-NEXT:    v_mad_u64_u32 v[6:7], s4, v4, v3, 0
-; GFX10-NEXT:    v_mad_u64_u32 v[9:10], s4, v5, v2, 0
-; GFX10-NEXT:    v_mad_i64_i32 v[11:12], s4, v5, v3, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, v4, v2, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[6:7], null, v4, v3, 0
+; GFX10-NEXT:    v_mad_u64_u32 v[9:10], null, v5, v2, 0
+; GFX10-NEXT:    v_mad_i64_i32 v[11:12], null, v5, v3, 0
 ; GFX10-NEXT:    v_mov_b32_e32 v8, v1
 ; GFX10-NEXT:    v_add_co_u32 v6, vcc_lo, v8, v6
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v7, vcc_lo, 0, v7, vcc_lo
@@ -223,10 +223,10 @@ define { i64, i1 } @smulo_i64_v_v(i64 %x, i64 %y) {
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    v_mov_b32_e32 v4, v0
 ; GFX11-NEXT:    v_mov_b32_e32 v5, v1
-; GFX11-NEXT:    v_mad_u64_u32 v[0:1], s0, v4, v2, 0
-; GFX11-NEXT:    v_mad_u64_u32 v[6:7], s0, v4, v3, 0
-; GFX11-NEXT:    v_mad_u64_u32 v[9:10], s0, v5, v2, 0
-; GFX11-NEXT:    v_mad_i64_i32 v[11:12], s0, v5, v3, 0
+; GFX11-NEXT:    v_mad_u64_u32 v[0:1], null, v4, v2, 0
+; GFX11-NEXT:    v_mad_u64_u32 v[6:7], null, v4, v3, 0
+; GFX11-NEXT:    v_mad_u64_u32 v[9:10], null, v5, v2, 0
+; GFX11-NEXT:    v_mad_i64_i32 v[11:12], null, v5, v3, 0
 ; GFX11-NEXT:    v_mov_b32_e32 v8, v1
 ; GFX11-NEXT:    v_add_co_u32 v6, vcc_lo, v8, v6
 ; GFX11-NEXT:    v_add_co_ci_u32_e32 v7, vcc_lo, 0, v7, vcc_lo

diff  --git a/llvm/test/CodeGen/AMDGPU/mad_64_32.ll b/llvm/test/CodeGen/AMDGPU/mad_64_32.ll
index 2c1f77e47877..55a0d65fbaff 100644
--- a/llvm/test/CodeGen/AMDGPU/mad_64_32.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad_64_32.ll
@@ -34,7 +34,7 @@ define i64 @mad_i64_i32_sextops(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    v_mov_b32_e32 v4, v1
 ; GFX11-NEXT:    v_mov_b32_e32 v5, v0
-; GFX11-NEXT:    v_mad_i64_i32 v[0:1], s0, v5, v4, v[2:3]
+; GFX11-NEXT:    v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %sext0 = sext i32 %arg0 to i64
   %sext1 = sext i32 %arg1 to i64
@@ -71,7 +71,7 @@ define i64 @mad_i64_i32_sextops_commute(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    v_mov_b32_e32 v4, v1
 ; GFX11-NEXT:    v_mov_b32_e32 v5, v0
-; GFX11-NEXT:    v_mad_i64_i32 v[0:1], s0, v5, v4, v[2:3]
+; GFX11-NEXT:    v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %sext0 = sext i32 %arg0 to i64
   %sext1 = sext i32 %arg1 to i64
@@ -108,7 +108,7 @@ define i64 @mad_u64_u32_zextops(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    v_mov_b32_e32 v4, v1
 ; GFX11-NEXT:    v_mov_b32_e32 v5, v0
-; GFX11-NEXT:    v_mad_u64_u32 v[0:1], s0, v5, v4, v[2:3]
+; GFX11-NEXT:    v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %sext0 = zext i32 %arg0 to i64
   %sext1 = zext i32 %arg1 to i64
@@ -145,7 +145,7 @@ define i64 @mad_u64_u32_zextops_commute(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    v_mov_b32_e32 v4, v1
 ; GFX11-NEXT:    v_mov_b32_e32 v5, v0
-; GFX11-NEXT:    v_mad_u64_u32 v[0:1], s0, v5, v4, v[2:3]
+; GFX11-NEXT:    v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %sext0 = zext i32 %arg0 to i64
   %sext1 = zext i32 %arg1 to i64
@@ -244,20 +244,20 @@ define i128 @mad_i64_i32_sextops_i32_i128(i32 %arg0, i32 %arg1, i128 %arg2) #0 {
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT:    v_mad_u64_u32 v[6:7], s0, v0, v1, 0
+; GFX11-NEXT:    v_mad_u64_u32 v[6:7], null, v0, v1, 0
 ; GFX11-NEXT:    v_mov_b32_e32 v8, 0
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v14, 31, v0
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v15, 31, v1
-; GFX11-NEXT:    v_mad_u64_u32 v[9:10], s0, v14, v1, v[7:8]
+; GFX11-NEXT:    v_mad_u64_u32 v[9:10], null, v14, v1, v[7:8]
 ; GFX11-NEXT:    v_mov_b32_e32 v7, v10
 ; GFX11-NEXT:    v_mov_b32_e32 v10, v8
-; GFX11-NEXT:    v_mad_u64_u32 v[11:12], s0, v0, v15, v[9:10]
-; GFX11-NEXT:    v_mad_i64_i32 v[9:10], s0, v1, v14, 0
+; GFX11-NEXT:    v_mad_u64_u32 v[11:12], null, v0, v15, v[9:10]
+; GFX11-NEXT:    v_mad_i64_i32 v[9:10], null, v1, v14, 0
 ; GFX11-NEXT:    v_mov_b32_e32 v8, v12
+; GFX11-NEXT:    v_mad_i64_i32 v[12:13], null, v15, v0, v[9:10]
 ; GFX11-NEXT:    v_add_co_u32 v7, s0, v7, v8
-; GFX11-NEXT:    v_add_co_ci_u32_e64 v8, s0, 0, 0, s0
-; GFX11-NEXT:    v_mad_i64_i32 v[12:13], s0, v15, v0, v[9:10]
-; GFX11-NEXT:    v_mad_u64_u32 v[0:1], s0, v14, v15, v[7:8]
+; GFX11-NEXT:    v_add_co_ci_u32_e64 v8, null, 0, 0, s0
+; GFX11-NEXT:    v_mad_u64_u32 v[0:1], null, v14, v15, v[7:8]
 ; GFX11-NEXT:    v_mov_b32_e32 v7, v11
 ; GFX11-NEXT:    v_add_co_u32 v8, vcc_lo, v0, v12
 ; GFX11-NEXT:    v_add_co_ci_u32_e32 v9, vcc_lo, v1, v13, vcc_lo
@@ -301,7 +301,7 @@ define i63 @mad_i64_i32_sextops_i32_i63(i32 %arg0, i32 %arg1, i63 %arg2) #0 {
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    v_mov_b32_e32 v4, v1
 ; GFX11-NEXT:    v_mov_b32_e32 v5, v0
-; GFX11-NEXT:    v_mad_i64_i32 v[0:1], s0, v5, v4, v[2:3]
+; GFX11-NEXT:    v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %sext0 = sext i32 %arg0 to i63
   %sext1 = sext i32 %arg1 to i63
@@ -346,7 +346,7 @@ define i63 @mad_i64_i32_sextops_i31_i63(i31 %arg0, i31 %arg1, i63 %arg2) #0 {
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    v_bfe_i32 v4, v1, 0, 31
 ; GFX11-NEXT:    v_bfe_i32 v5, v0, 0, 31
-; GFX11-NEXT:    v_mad_i64_i32 v[0:1], s0, v5, v4, v[2:3]
+; GFX11-NEXT:    v_mad_i64_i32 v[0:1], null, v5, v4, v[2:3]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %sext0 = sext i31 %arg0 to i63
   %sext1 = sext i31 %arg1 to i63
@@ -394,10 +394,10 @@ define i64 @mad_i64_i32_extops_i32_i64(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    v_mov_b32_e32 v4, v1
 ; GFX11-NEXT:    v_mov_b32_e32 v5, v0
-; GFX11-NEXT:    v_mad_u64_u32 v[0:1], s0, v5, v4, v[2:3]
+; GFX11-NEXT:    v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3]
 ; GFX11-NEXT:    v_ashrrev_i32_e32 v5, 31, v5
 ; GFX11-NEXT:    v_mov_b32_e32 v3, v1
-; GFX11-NEXT:    v_mad_u64_u32 v[1:2], s0, v5, v4, v[3:4]
+; GFX11-NEXT:    v_mad_u64_u32 v[1:2], null, v5, v4, v[3:4]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %ext0 = sext i32 %arg0 to i64
   %ext1 = zext i32 %arg1 to i64
@@ -433,7 +433,7 @@ define i64 @mad_u64_u32_bitops(i64 %arg0, i64 %arg1, i64 %arg2) #0 {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    v_mov_b32_e32 v3, v0
-; GFX11-NEXT:    v_mad_u64_u32 v[0:1], s0, v3, v2, v[4:5]
+; GFX11-NEXT:    v_mad_u64_u32 v[0:1], null, v3, v2, v[4:5]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %trunc.lhs = and i64 %arg0, 4294967295
   %trunc.rhs = and i64 %arg1, 4294967295
@@ -481,10 +481,10 @@ define i64 @mad_u64_u32_bitops_lhs_mask_small(i64 %arg0, i64 %arg1, i64 %arg2) #
 ; GFX11-NEXT:    v_mov_b32_e32 v3, v2
 ; GFX11-NEXT:    v_mov_b32_e32 v2, v0
 ; GFX11-NEXT:    v_mov_b32_e32 v6, v1
-; GFX11-NEXT:    v_mad_u64_u32 v[0:1], s0, v2, v3, v[4:5]
+; GFX11-NEXT:    v_mad_u64_u32 v[0:1], null, v2, v3, v[4:5]
 ; GFX11-NEXT:    v_and_b32_e32 v5, 1, v6
 ; GFX11-NEXT:    v_mov_b32_e32 v4, v1
-; GFX11-NEXT:    v_mad_u64_u32 v[1:2], s0, v5, v3, v[4:5]
+; GFX11-NEXT:    v_mad_u64_u32 v[1:2], null, v5, v3, v[4:5]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %trunc.lhs = and i64 %arg0, 8589934591
   %trunc.rhs = and i64 %arg1, 4294967295
@@ -532,10 +532,10 @@ define i64 @mad_u64_u32_bitops_rhs_mask_small(i64 %arg0, i64 %arg1, i64 %arg2) #
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    v_mov_b32_e32 v6, v0
-; GFX11-NEXT:    v_mad_u64_u32 v[0:1], s0, v6, v2, v[4:5]
+; GFX11-NEXT:    v_mad_u64_u32 v[0:1], null, v6, v2, v[4:5]
 ; GFX11-NEXT:    v_and_b32_e32 v4, 1, v3
 ; GFX11-NEXT:    v_mov_b32_e32 v3, v1
-; GFX11-NEXT:    v_mad_u64_u32 v[1:2], s0, v6, v4, v[3:4]
+; GFX11-NEXT:    v_mad_u64_u32 v[1:2], null, v6, v4, v[3:4]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %trunc.lhs = and i64 %arg0, 4294967295
   %trunc.rhs = and i64 %arg1, 8589934591
@@ -571,7 +571,7 @@ define i64 @mad_i64_i32_bitops(i64 %arg0, i64 %arg1, i64 %arg2) #0 {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    v_mov_b32_e32 v3, v0
-; GFX11-NEXT:    v_mad_i64_i32 v[0:1], s0, v3, v2, v[4:5]
+; GFX11-NEXT:    v_mad_i64_i32 v[0:1], null, v3, v2, v[4:5]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %shl.lhs = shl i64 %arg0, 32
   %trunc.lhs = ashr i64 %shl.lhs, 32
@@ -609,7 +609,7 @@ define i64 @mad_i64_i32_unpack_i64ops(i64 %arg0) #0 {
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT:    v_mad_u64_u32 v[2:3], s0, v1, v0, v[0:1]
+; GFX11-NEXT:    v_mad_u64_u32 v[2:3], null, v1, v0, v[0:1]
 ; GFX11-NEXT:    v_mov_b32_e32 v0, v2
 ; GFX11-NEXT:    v_mov_b32_e32 v1, v3
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
@@ -729,8 +729,8 @@ define i64 @mad_i64_i32_twice(i32 %arg0, i32 %arg1, i64 %arg2, i64 %arg3) #0 {
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT:    v_mad_i64_i32 v[6:7], s0, v0, v1, v[2:3]
-; GFX11-NEXT:    v_mad_i64_i32 v[2:3], s0, v0, v1, v[4:5]
+; GFX11-NEXT:    v_mad_i64_i32 v[6:7], null, v0, v1, v[2:3]
+; GFX11-NEXT:    v_mad_i64_i32 v[2:3], null, v0, v1, v[4:5]
 ; GFX11-NEXT:    v_xor_b32_e32 v0, v6, v2
 ; GFX11-NEXT:    v_xor_b32_e32 v1, v7, v3
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
@@ -793,7 +793,7 @@ define i64 @mad_i64_i32_thrice(i32 %arg0, i32 %arg1, i64 %arg2, i64 %arg3, i64 %
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT:    v_mad_i64_i32 v[8:9], s0, v0, v1, 0
+; GFX11-NEXT:    v_mad_i64_i32 v[8:9], null, v0, v1, 0
 ; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, v8, v2
 ; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v9, v3, vcc_lo
 ; GFX11-NEXT:    v_add_co_u32 v2, vcc_lo, v8, v4
@@ -851,7 +851,7 @@ define i64 @mad_i64_i32_secondary_use(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX11-NEXT:    v_mad_i64_i32 v[4:5], s0, v0, v1, 0
+; GFX11-NEXT:    v_mad_i64_i32 v[4:5], null, v0, v1, 0
 ; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, v4, v2
 ; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v5, v3, vcc_lo
 ; GFX11-NEXT:    v_xor_b32_e32 v0, v0, v4
@@ -908,7 +908,7 @@ define i48 @mad_i48_i48(i48 %arg0, i48 %arg1, i48 %arg2) #0 {
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    v_mov_b32_e32 v6, v1
 ; GFX11-NEXT:    v_mov_b32_e32 v7, v0
-; GFX11-NEXT:    v_mad_u64_u32 v[0:1], s0, v7, v2, v[4:5]
+; GFX11-NEXT:    v_mad_u64_u32 v[0:1], null, v7, v2, v[4:5]
 ; GFX11-NEXT:    v_mul_lo_u32 v3, v7, v3
 ; GFX11-NEXT:    v_mul_lo_u32 v2, v6, v2
 ; GFX11-NEXT:    v_add3_u32 v1, v2, v1, v3

diff  --git a/llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll b/llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll
index 2a06faaa5878..279a79427345 100644
--- a/llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll
@@ -2,6 +2,8 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx900 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1100 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX11 %s
 
 define amdgpu_ps float @mad_i32_vvv(i32 %a, i32 %b, i32 %c) {
 ; GFX9-LABEL: mad_i32_vvv:
@@ -11,14 +13,14 @@ define amdgpu_ps float @mad_i32_vvv(i32 %a, i32 %b, i32 %c) {
 ;
 ; GFX10-LABEL: mad_i32_vvv:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, v0, v1, v[2:3]
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, v0, v1, v[2:3]
 ; GFX10-NEXT:    ; return to shader part epilog
 ;
 ; GFX11-LABEL: mad_i32_vvv:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    v_mov_b32_e32 v3, v1
 ; GFX11-NEXT:    v_mov_b32_e32 v4, v0
-; GFX11-NEXT:    v_mad_u64_u32 v[0:1], s0, v4, v3, v[2:3]
+; GFX11-NEXT:    v_mad_u64_u32 v[0:1], null, v4, v3, v[2:3]
 ; GFX11-NEXT:    ; return to shader part epilog
   %mul = mul i32 %a, %b
   %add = add i32 %mul, %c
@@ -47,14 +49,14 @@ define amdgpu_ps float @mad_i32_vvc(i32 %a, i32 %b) {
 ;
 ; GFX10-LABEL: mad_i32_vvc:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, v0, v1, 42
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, v0, v1, 42
 ; GFX10-NEXT:    ; return to shader part epilog
 ;
 ; GFX11-LABEL: mad_i32_vvc:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    v_mov_b32_e32 v2, v1
 ; GFX11-NEXT:    v_mov_b32_e32 v3, v0
-; GFX11-NEXT:    v_mad_u64_u32 v[0:1], s0, v3, v2, 42
+; GFX11-NEXT:    v_mad_u64_u32 v[0:1], null, v3, v2, 42
 ; GFX11-NEXT:    ; return to shader part epilog
   %mul = mul i32 %a, %b
   %add = add i32 %mul, 42
@@ -72,14 +74,14 @@ define amdgpu_ps float @mad_i32_vvi(i32 %a, i32 %b) {
 ;
 ; GFX10-LABEL: mad_i32_vvi:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, v0, v1, 0x12d687
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, v0, v1, 0x12d687
 ; GFX10-NEXT:    ; return to shader part epilog
 ;
 ; GFX11-LABEL: mad_i32_vvi:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    v_mov_b32_e32 v2, v1
 ; GFX11-NEXT:    v_mov_b32_e32 v3, v0
-; GFX11-NEXT:    v_mad_u64_u32 v[0:1], s0, v3, v2, 0x12d687
+; GFX11-NEXT:    v_mad_u64_u32 v[0:1], null, v3, v2, 0x12d687
 ; GFX11-NEXT:    ; return to shader part epilog
   %mul = mul i32 %a, %b
   %add = add i32 %mul, 1234567
@@ -95,12 +97,12 @@ define amdgpu_ps float @mad_i32_vcv(i32 %a, i32 %c) {
 ;
 ; GFX10-LABEL: mad_i32_vcv:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, v0, 42, v[1:2]
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, v0, 42, v[1:2]
 ; GFX10-NEXT:    ; return to shader part epilog
 ;
 ; GFX11-LABEL: mad_i32_vcv:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_mad_u64_u32 v[2:3], s0, v0, 42, v[1:2]
+; GFX11-NEXT:    v_mad_u64_u32 v[2:3], null, v0, 42, v[1:2]
 ; GFX11-NEXT:    v_mov_b32_e32 v0, v2
 ; GFX11-NEXT:    ; return to shader part epilog
   %mul = mul i32 %a, 42
@@ -117,13 +119,13 @@ define amdgpu_ps float @mad_i32_vcc(i32 %a) {
 ;
 ; GFX10-LABEL: mad_i32_vcc:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, v0, 42, 43
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, v0, 42, 43
 ; GFX10-NEXT:    ; return to shader part epilog
 ;
 ; GFX11-LABEL: mad_i32_vcc:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    v_mov_b32_e32 v2, v0
-; GFX11-NEXT:    v_mad_u64_u32 v[0:1], s0, v2, 42, 43
+; GFX11-NEXT:    v_mad_u64_u32 v[0:1], null, v2, 42, 43
 ; GFX11-NEXT:    ; return to shader part epilog
   %mul = mul i32 %a, 42
   %add = add i32 %mul, 43
@@ -139,14 +141,14 @@ define amdgpu_ps float @mad_i32_vvs(i32 %a, i32 %b, i32 inreg %c) {
 ;
 ; GFX10-LABEL: mad_i32_vvs:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, v0, v1, s[0:1]
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, v0, v1, s[0:1]
 ; GFX10-NEXT:    ; return to shader part epilog
 ;
 ; GFX11-LABEL: mad_i32_vvs:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    v_mov_b32_e32 v2, v1
 ; GFX11-NEXT:    v_mov_b32_e32 v3, v0
-; GFX11-NEXT:    v_mad_u64_u32 v[0:1], s0, v3, v2, s[0:1]
+; GFX11-NEXT:    v_mad_u64_u32 v[0:1], null, v3, v2, s[0:1]
 ; GFX11-NEXT:    ; return to shader part epilog
   %mul = mul i32 %a, %b
   %add = add i32 %mul, %c
@@ -162,12 +164,12 @@ define amdgpu_ps float @mad_i32_vsv(i32 %a, i32 inreg %b, i32 %c) {
 ;
 ; GFX10-LABEL: mad_i32_vsv:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, v0, s0, v[1:2]
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, v0, s0, v[1:2]
 ; GFX10-NEXT:    ; return to shader part epilog
 ;
 ; GFX11-LABEL: mad_i32_vsv:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_mad_u64_u32 v[2:3], s0, v0, s0, v[1:2]
+; GFX11-NEXT:    v_mad_u64_u32 v[2:3], null, v0, s0, v[1:2]
 ; GFX11-NEXT:    v_mov_b32_e32 v0, v2
 ; GFX11-NEXT:    ; return to shader part epilog
   %mul = mul i32 %a, %b
@@ -184,12 +186,12 @@ define amdgpu_ps float @mad_i32_svv(i32 inreg %a, i32 %b, i32 %c) {
 ;
 ; GFX10-LABEL: mad_i32_svv:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s0, v0, v[1:2]
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, s0, v0, v[1:2]
 ; GFX10-NEXT:    ; return to shader part epilog
 ;
 ; GFX11-LABEL: mad_i32_svv:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_mad_u64_u32 v[2:3], s0, s0, v0, v[1:2]
+; GFX11-NEXT:    v_mad_u64_u32 v[2:3], null, s0, v0, v[1:2]
 ; GFX11-NEXT:    v_mov_b32_e32 v0, v2
 ; GFX11-NEXT:    ; return to shader part epilog
   %mul = mul i32 %a, %b
@@ -208,14 +210,14 @@ define amdgpu_ps float @mad_i32_vss(i32 %a, i32 inreg %b, i32 inreg %c) {
 ; GFX10-LABEL: mad_i32_vss:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_mov_b32 s2, s1
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, v0, s0, s[2:3]
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, v0, s0, s[2:3]
 ; GFX10-NEXT:    ; return to shader part epilog
 ;
 ; GFX11-LABEL: mad_i32_vss:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    v_mov_b32_e32 v2, v0
 ; GFX11-NEXT:    s_mov_b32 s2, s1
-; GFX11-NEXT:    v_mad_u64_u32 v[0:1], s0, v2, s0, s[2:3]
+; GFX11-NEXT:    v_mad_u64_u32 v[0:1], null, v2, s0, s[2:3]
 ; GFX11-NEXT:    ; return to shader part epilog
   %mul = mul i32 %a, %b
   %add = add i32 %mul, %c
@@ -233,14 +235,14 @@ define amdgpu_ps float @mad_i32_svs(i32 inreg %a, i32 %b, i32 inreg %c) {
 ; GFX10-LABEL: mad_i32_svs:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_mov_b32 s2, s1
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s0, v0, s[2:3]
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, s0, v0, s[2:3]
 ; GFX10-NEXT:    ; return to shader part epilog
 ;
 ; GFX11-LABEL: mad_i32_svs:
 ; GFX11:       ; %bb.0:
 ; GFX11-NEXT:    v_mov_b32_e32 v2, v0
 ; GFX11-NEXT:    s_mov_b32 s2, s1
-; GFX11-NEXT:    v_mad_u64_u32 v[0:1], s0, s0, v2, s[2:3]
+; GFX11-NEXT:    v_mad_u64_u32 v[0:1], null, s0, v2, s[2:3]
 ; GFX11-NEXT:    ; return to shader part epilog
   %mul = mul i32 %a, %b
   %add = add i32 %mul, %c
@@ -257,12 +259,12 @@ define amdgpu_ps float @mad_i32_ssv(i32 inreg %a, i32 inreg %b, i32 %c) {
 ;
 ; GFX10-LABEL: mad_i32_ssv:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_mad_u64_u32 v[0:1], s0, s0, s1, v[0:1]
+; GFX10-NEXT:    v_mad_u64_u32 v[0:1], null, s0, s1, v[0:1]
 ; GFX10-NEXT:    ; return to shader part epilog
 ;
 ; GFX11-LABEL: mad_i32_ssv:
 ; GFX11:       ; %bb.0:
-; GFX11-NEXT:    v_mad_u64_u32 v[1:2], s0, s0, s1, v[0:1]
+; GFX11-NEXT:    v_mad_u64_u32 v[1:2], null, s0, s1, v[0:1]
 ; GFX11-NEXT:    v_mov_b32_e32 v0, v1
 ; GFX11-NEXT:    ; return to shader part epilog
   %mul = mul i32 %a, %b

diff  --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll
index 06e36ec05cd6..71d9018c1d7b 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll
@@ -169,7 +169,7 @@ define amdgpu_kernel void @flat_nontemporal_load_1(
 ; GFX10-WGP-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-WGP-NEXT:    v_add_co_u32 v0, s0, s0, v0
-; GFX10-WGP-NEXT:    v_add_co_ci_u32_e64 v1, s0, s1, 0, s0
+; GFX10-WGP-NEXT:    v_add_co_ci_u32_e64 v1, null, s1, 0, s0
 ; GFX10-WGP-NEXT:    flat_load_dword v2, v[0:1] slc
 ; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s3
@@ -183,7 +183,7 @@ define amdgpu_kernel void @flat_nontemporal_load_1(
 ; GFX10-CU-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-CU-NEXT:    v_add_co_u32 v0, s0, s0, v0
-; GFX10-CU-NEXT:    v_add_co_ci_u32_e64 v1, s0, s1, 0, s0
+; GFX10-CU-NEXT:    v_add_co_ci_u32_e64 v1, null, s1, 0, s0
 ; GFX10-CU-NEXT:    flat_load_dword v2, v[0:1] slc
 ; GFX10-CU-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s3
@@ -270,8 +270,8 @@ define amdgpu_kernel void @flat_nontemporal_load_1(
 ; GFX11-WGP-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-WGP-NEXT:    v_add_co_u32 v0, s0, s0, v0
-; GFX11-WGP-NEXT:    v_add_co_ci_u32_e64 v1, s0, s1, 0, s0
-; GFX11-WGP-NEXT:    flat_load_b32 v2, v[0:1] slc dlc
+; GFX11-WGP-NEXT:    v_add_co_ci_u32_e64 v1, null, s1, 0, s0
+; GFX11-WGP-NEXT:    flat_load_b32 v2, v[0:1] slc
 ; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -284,8 +284,8 @@ define amdgpu_kernel void @flat_nontemporal_load_1(
 ; GFX11-CU-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-CU-NEXT:    v_add_co_u32 v0, s0, s0, v0
-; GFX11-CU-NEXT:    v_add_co_ci_u32_e64 v1, s0, s1, 0, s0
-; GFX11-CU-NEXT:    flat_load_b32 v2, v[0:1] slc dlc
+; GFX11-CU-NEXT:    v_add_co_ci_u32_e64 v1, null, s1, 0, s0
+; GFX11-CU-NEXT:    flat_load_b32 v2, v[0:1] slc
 ; GFX11-CU-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -462,7 +462,7 @@ define amdgpu_kernel void @flat_nontemporal_store_1(
 ; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, s1
 ; GFX10-WGP-NEXT:    v_add_co_u32 v0, s0, s2, v0
 ; GFX10-WGP-NEXT:    flat_load_dword v2, v[1:2]
-; GFX10-WGP-NEXT:    v_add_co_ci_u32_e64 v1, s0, s3, 0, s0
+; GFX10-WGP-NEXT:    v_add_co_ci_u32_e64 v1, null, s3, 0, s0
 ; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX10-WGP-NEXT:    flat_store_dword v[0:1], v2 glc slc
 ; GFX10-WGP-NEXT:    s_endpgm
@@ -476,7 +476,7 @@ define amdgpu_kernel void @flat_nontemporal_store_1(
 ; GFX10-CU-NEXT:    v_mov_b32_e32 v2, s1
 ; GFX10-CU-NEXT:    v_add_co_u32 v0, s0, s2, v0
 ; GFX10-CU-NEXT:    flat_load_dword v2, v[1:2]
-; GFX10-CU-NEXT:    v_add_co_ci_u32_e64 v1, s0, s3, 0, s0
+; GFX10-CU-NEXT:    v_add_co_ci_u32_e64 v1, null, s3, 0, s0
 ; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX10-CU-NEXT:    flat_store_dword v[0:1], v2 glc slc
 ; GFX10-CU-NEXT:    s_endpgm
@@ -563,7 +563,7 @@ define amdgpu_kernel void @flat_nontemporal_store_1(
 ; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, s1
 ; GFX11-WGP-NEXT:    v_add_co_u32 v0, s0, s2, v0
 ; GFX11-WGP-NEXT:    flat_load_b32 v2, v[1:2]
-; GFX11-WGP-NEXT:    v_add_co_ci_u32_e64 v1, s0, s3, 0, s0
+; GFX11-WGP-NEXT:    v_add_co_ci_u32_e64 v1, null, s3, 0, s0
 ; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX11-WGP-NEXT:    flat_store_b32 v[0:1], v2 glc slc dlc
 ; GFX11-WGP-NEXT:    s_endpgm
@@ -577,7 +577,7 @@ define amdgpu_kernel void @flat_nontemporal_store_1(
 ; GFX11-CU-NEXT:    v_mov_b32_e32 v2, s1
 ; GFX11-CU-NEXT:    v_add_co_u32 v0, s0, s2, v0
 ; GFX11-CU-NEXT:    flat_load_b32 v2, v[1:2]
-; GFX11-CU-NEXT:    v_add_co_ci_u32_e64 v1, s0, s3, 0, s0
+; GFX11-CU-NEXT:    v_add_co_ci_u32_e64 v1, null, s3, 0, s0
 ; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX11-CU-NEXT:    flat_store_b32 v[0:1], v2 glc slc dlc
 ; GFX11-CU-NEXT:    s_endpgm

diff  --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll
index a05a651128eb..9e530ed43453 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll
@@ -120,7 +120,7 @@ define amdgpu_kernel void @flat_nontemporal_load_1(
 ; GFX10-WGP-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-WGP-NEXT:    v_add_co_u32 v0, s0, s0, v0
-; GFX10-WGP-NEXT:    v_add_co_ci_u32_e64 v1, s0, s1, 0, s0
+; GFX10-WGP-NEXT:    v_add_co_ci_u32_e64 v1, null, s1, 0, s0
 ; GFX10-WGP-NEXT:    flat_load_dword v2, v[0:1] glc dlc
 ; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, s2
@@ -135,7 +135,7 @@ define amdgpu_kernel void @flat_nontemporal_load_1(
 ; GFX10-CU-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-CU-NEXT:    v_add_co_u32 v0, s0, s0, v0
-; GFX10-CU-NEXT:    v_add_co_ci_u32_e64 v1, s0, s1, 0, s0
+; GFX10-CU-NEXT:    v_add_co_ci_u32_e64 v1, null, s1, 0, s0
 ; GFX10-CU-NEXT:    flat_load_dword v2, v[0:1] glc dlc
 ; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-CU-NEXT:    v_mov_b32_e32 v0, s2
@@ -166,7 +166,7 @@ define amdgpu_kernel void @flat_nontemporal_load_1(
 ; GFX11-WGP-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-WGP-NEXT:    v_add_co_u32 v0, s0, s0, v0
-; GFX11-WGP-NEXT:    v_add_co_ci_u32_e64 v1, s0, s1, 0, s0
+; GFX11-WGP-NEXT:    v_add_co_ci_u32_e64 v1, null, s1, 0, s0
 ; GFX11-WGP-NEXT:    flat_load_b32 v2, v[0:1] glc dlc
 ; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, s2
@@ -181,7 +181,7 @@ define amdgpu_kernel void @flat_nontemporal_load_1(
 ; GFX11-CU-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-CU-NEXT:    v_add_co_u32 v0, s0, s0, v0
-; GFX11-CU-NEXT:    v_add_co_ci_u32_e64 v1, s0, s1, 0, s0
+; GFX11-CU-NEXT:    v_add_co_ci_u32_e64 v1, null, s1, 0, s0
 ; GFX11-CU-NEXT:    flat_load_b32 v2, v[0:1] glc dlc
 ; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-CU-NEXT:    v_mov_b32_e32 v0, s2
@@ -315,7 +315,7 @@ define amdgpu_kernel void @flat_nontemporal_store_1(
 ; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, s1
 ; GFX10-WGP-NEXT:    v_add_co_u32 v0, s0, s2, v0
 ; GFX10-WGP-NEXT:    flat_load_dword v2, v[1:2]
-; GFX10-WGP-NEXT:    v_add_co_ci_u32_e64 v1, s0, s3, 0, s0
+; GFX10-WGP-NEXT:    v_add_co_ci_u32_e64 v1, null, s3, 0, s0
 ; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX10-WGP-NEXT:    flat_store_dword v[0:1], v2
 ; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
@@ -330,7 +330,7 @@ define amdgpu_kernel void @flat_nontemporal_store_1(
 ; GFX10-CU-NEXT:    v_mov_b32_e32 v2, s1
 ; GFX10-CU-NEXT:    v_add_co_u32 v0, s0, s2, v0
 ; GFX10-CU-NEXT:    flat_load_dword v2, v[1:2]
-; GFX10-CU-NEXT:    v_add_co_ci_u32_e64 v1, s0, s3, 0, s0
+; GFX10-CU-NEXT:    v_add_co_ci_u32_e64 v1, null, s3, 0, s0
 ; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX10-CU-NEXT:    flat_store_dword v[0:1], v2
 ; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
@@ -361,7 +361,7 @@ define amdgpu_kernel void @flat_nontemporal_store_1(
 ; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, s1
 ; GFX11-WGP-NEXT:    v_add_co_u32 v0, s0, s2, v0
 ; GFX11-WGP-NEXT:    flat_load_b32 v2, v[1:2]
-; GFX11-WGP-NEXT:    v_add_co_ci_u32_e64 v1, s0, s3, 0, s0
+; GFX11-WGP-NEXT:    v_add_co_ci_u32_e64 v1, null, s3, 0, s0
 ; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX11-WGP-NEXT:    flat_store_b32 v[0:1], v2 dlc
 ; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
@@ -376,7 +376,7 @@ define amdgpu_kernel void @flat_nontemporal_store_1(
 ; GFX11-CU-NEXT:    v_mov_b32_e32 v2, s1
 ; GFX11-CU-NEXT:    v_add_co_u32 v0, s0, s2, v0
 ; GFX11-CU-NEXT:    flat_load_b32 v2, v[1:2]
-; GFX11-CU-NEXT:    v_add_co_ci_u32_e64 v1, s0, s3, 0, s0
+; GFX11-CU-NEXT:    v_add_co_ci_u32_e64 v1, null, s3, 0, s0
 ; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX11-CU-NEXT:    flat_store_b32 v[0:1], v2 dlc
 ; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0

diff  --git a/llvm/test/CodeGen/AMDGPU/offset-split-global.ll b/llvm/test/CodeGen/AMDGPU/offset-split-global.ll
index 06e0984d05e4..91672e0f3b25 100644
--- a/llvm/test/CodeGen/AMDGPU/offset-split-global.ll
+++ b/llvm/test/CodeGen/AMDGPU/offset-split-global.ll
@@ -749,7 +749,7 @@ define amdgpu_kernel void @global_inst_salu_offset_neg_12bit_max(i8 addrspace(1)
 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_add_co_u32 v0, s0, 0xfffff000, s0
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    global_store_byte v[0:1], v0, off
@@ -778,7 +778,7 @@ define amdgpu_kernel void @global_inst_salu_offset_neg_13bit_max(i8 addrspace(1)
 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_add_co_u32 v0, s0, 0xffffe000, s0
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    global_store_byte v[0:1], v0, off
@@ -883,7 +883,7 @@ define amdgpu_kernel void @global_inst_salu_offset_2x_neg_11bit_max(i8 addrspace
 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_add_co_u32 v0, s0, 0xfffff000, s0
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    global_store_byte v[0:1], v0, off
@@ -912,7 +912,7 @@ define amdgpu_kernel void @global_inst_salu_offset_2x_neg_12bit_max(i8 addrspace
 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_add_co_u32 v0, s0, 0xffffe000, s0
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    global_store_byte v[0:1], v0, off
@@ -941,7 +941,7 @@ define amdgpu_kernel void @global_inst_salu_offset_2x_neg_13bit_max(i8 addrspace
 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_add_co_u32 v0, s0, 0xffffc000, s0
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, -1, s1, s0
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    global_store_byte v[0:1], v0, off
@@ -971,7 +971,7 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split0(i8 addrspa
 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_add_co_u32 v0, s0, 0, s0
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    global_store_byte v[0:1], v0, off
@@ -1001,7 +1001,7 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split1(i8 addrspa
 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_add_co_u32 v0, s0, 0x800, s0
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    global_store_byte v[0:1], v0, off
@@ -1031,7 +1031,7 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split0(i8 addrspa
 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_add_co_u32 v0, s0, 0x800, s0
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    global_store_byte v[0:1], v0, off
@@ -1061,7 +1061,7 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split1(i8 addrspa
 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_add_co_u32 v0, s0, 0x1000, s0
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    global_store_byte v[0:1], v0, off
@@ -1091,7 +1091,7 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split0(i8 addrspa
 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_add_co_u32 v0, s0, 0x1800, s0
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047 glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    global_store_byte v[0:1], v0, off
@@ -1121,7 +1121,7 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split1(i8 addrspa
 ; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_add_co_u32 v0, s0, 0x2000, s0
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, 2, s1, s0
 ; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    global_store_byte v[0:1], v0, off

diff  --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
index d958f5d9d97d..9c7067455471 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
@@ -186,7 +186,7 @@ define amdgpu_kernel void @clmem_read_simplified(i8 addrspace(1)*  %buffer) {
 ; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff8000, v2
 ; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 3, v[0:1]
 ; GFX10-NEXT:    v_add_co_u32 v2, s0, s34, v2
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v3, s0, s35, 0, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v3, null, s35, 0, s0
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
 ; GFX10-NEXT:    v_add_co_u32 v4, vcc_lo, v0, 0x1000
@@ -622,7 +622,7 @@ define hidden amdgpu_kernel void @clmem_read(i8 addrspace(1)*  %buffer) {
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v6, vcc_lo, s35, v1, vcc_lo
 ; GFX10-NEXT:    v_add_co_u32 v0, s0, s34, v2
 ; GFX10-NEXT:    v_add_co_u32 v2, vcc_lo, 0x5000, v3
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, s35, 0, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, s35, 0, s0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, 0, v6, vcc_lo
 ; GFX10-NEXT:  .LBB1_1: ; %for.cond.preheader
 ; GFX10-NEXT:    ; =>This Loop Header: Depth=1
@@ -1087,7 +1087,7 @@ define amdgpu_kernel void @Address32(i8 addrspace(1)* %buffer) {
 ; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff8000, v2
 ; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
 ; GFX10-NEXT:    v_add_co_u32 v2, s0, s34, v2
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v3, s0, s35, 0, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v3, null, s35, 0, s0
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
 ; GFX10-NEXT:    v_add_co_u32 v4, vcc_lo, 0x800, v0
@@ -1367,7 +1367,7 @@ define amdgpu_kernel void @Offset64(i8 addrspace(1)*  %buffer) {
 ; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff8000, v2
 ; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 3, v[0:1]
 ; GFX10-NEXT:    v_add_co_u32 v2, s0, s34, v2
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v3, s0, s35, 0, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v3, null, s35, 0, s0
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
 ; GFX10-NEXT:    v_add_co_u32 v4, vcc_lo, v0, 0xfffff800
@@ -1594,7 +1594,7 @@ define amdgpu_kernel void @p32Offset64(i8 addrspace(1)*  %buffer) {
 ; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff8000, v2
 ; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
 ; GFX10-NEXT:    v_add_co_u32 v2, s0, s34, v2
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v3, s0, s35, 0, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v3, null, s35, 0, s0
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
 ; GFX10-NEXT:    v_add_co_u32 v4, vcc_lo, v0, 0x80000000
@@ -1838,9 +1838,9 @@ define amdgpu_kernel void @DiffBase(i8 addrspace(1)* %buffer1,
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 7, v0
 ; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff8000, v0
 ; GFX10-NEXT:    v_add_co_u32 v0, s0, s36, v2
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, s37, 0, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, s37, 0, s0
 ; GFX10-NEXT:    v_add_co_u32 v14, s0, s38, v2
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v15, s0, s39, 0, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v15, null, s39, 0, s0
 ; GFX10-NEXT:    v_add_co_u32 v2, vcc_lo, v0, 0x1800
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, 0, v1, vcc_lo
 ; GFX10-NEXT:    v_add_co_u32 v4, vcc_lo, v14, 0x3000
@@ -2090,7 +2090,7 @@ define amdgpu_kernel void @ReverseOrder(i8 addrspace(1)* %buffer) {
 ; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff8000, v2
 ; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 3, v[0:1]
 ; GFX10-NEXT:    v_add_co_u32 v2, s0, s34, v2
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v3, s0, s35, 0, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v3, null, s35, 0, s0
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v2, v0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo
 ; GFX10-NEXT:    v_add_co_u32 v4, vcc_lo, 0x3800, v0
@@ -2366,7 +2366,7 @@ define hidden amdgpu_kernel void @negativeoffset(i8 addrspace(1)* nocapture %buf
 ; GFX10-NEXT:    v_and_b32_e32 v2, 0xffff8000, v2
 ; GFX10-NEXT:    v_lshlrev_b64 v[0:1], 3, v[0:1]
 ; GFX10-NEXT:    v_add_co_u32 v2, s0, s34, v2
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v3, s0, s35, 0, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v3, null, s35, 0, s0
 ; GFX10-NEXT:    v_add_co_u32 v4, vcc_lo, v2, v0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v5, vcc_lo, v3, v1, vcc_lo
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, 0x800, v4

diff  --git a/llvm/test/CodeGen/AMDGPU/saddo.ll b/llvm/test/CodeGen/AMDGPU/saddo.ll
index bd8b68a16a2b..50354486cf47 100644
--- a/llvm/test/CodeGen/AMDGPU/saddo.ll
+++ b/llvm/test/CodeGen/AMDGPU/saddo.ll
@@ -90,7 +90,7 @@ define amdgpu_kernel void @saddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 %b
 ; GFX10-NEXT:    s_xor_b32 s2, s2, s3
 ; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s2
 ; GFX10-NEXT:    v_add_co_u32 v0, s0, s0, v0
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, s1, 0, s0
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, null, s1, 0, s0
 ; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[4:5]
 ; GFX10-NEXT:    s_endpgm
   %sadd = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) nounwind

diff  --git a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
index d5d8817783fb..9391ca1fc2b1 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll
@@ -39,7 +39,7 @@ define amdgpu_kernel void @kernel_background_evaluate(float addrspace(5)* %kg, <
 ; MUBUF-NEXT:    buffer_load_dword v2, v0, s[36:39], 0 offen offset:4
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0)
 ; MUBUF-NEXT:    v_add_nc_u32_e32 v0, v2, v1
-; MUBUF-NEXT:    v_mad_u64_u32 v[0:1], s0, v0, s0, 0x3039
+; MUBUF-NEXT:    v_mad_u64_u32 v[0:1], null, v0, s0, 0x3039
 ; MUBUF-NEXT:    buffer_store_dword v0, v0, s[36:39], 0 offen
 ; MUBUF-NEXT:  .LBB0_2: ; %shader_eval_surface.exit
 ; MUBUF-NEXT:    s_endpgm
@@ -71,7 +71,7 @@ define amdgpu_kernel void @kernel_background_evaluate(float addrspace(5)* %kg, <
 ; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, vcc_lo offset:4
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    v_add_nc_u32_e32 v0, v1, v0
-; FLATSCR-NEXT:    v_mad_u64_u32 v[0:1], s0, v0, s0, 0x3039
+; FLATSCR-NEXT:    v_mad_u64_u32 v[0:1], null, v0, s0, 0x3039
 ; FLATSCR-NEXT:    scratch_store_dword off, v0, s0
 ; FLATSCR-NEXT:  .LBB0_2: ; %shader_eval_surface.exit
 ; FLATSCR-NEXT:    s_endpgm

diff  --git a/llvm/test/CodeGen/AMDGPU/udiv.ll b/llvm/test/CodeGen/AMDGPU/udiv.ll
index e4fdafe26551..ab2322d03cc8 100644
--- a/llvm/test/CodeGen/AMDGPU/udiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/udiv.ll
@@ -2788,9 +2788,9 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) {
 ; GFX1030-NEXT:    v_add_co_u32 v5, vcc_lo, v2, v3
 ; GFX1030-NEXT:    v_add_co_ci_u32_e32 v6, vcc_lo, s4, v4, vcc_lo
 ; GFX1030-NEXT:    v_mul_hi_u32 v8, v0, v5
-; GFX1030-NEXT:    v_mad_u64_u32 v[4:5], s4, v1, v5, 0
-; GFX1030-NEXT:    v_mad_u64_u32 v[2:3], s4, v0, v6, 0
-; GFX1030-NEXT:    v_mad_u64_u32 v[6:7], s4, v1, v6, 0
+; GFX1030-NEXT:    v_mad_u64_u32 v[4:5], null, v1, v5, 0
+; GFX1030-NEXT:    v_mad_u64_u32 v[2:3], null, v0, v6, 0
+; GFX1030-NEXT:    v_mad_u64_u32 v[6:7], null, v1, v6, 0
 ; GFX1030-NEXT:    v_add_co_u32 v2, vcc_lo, v8, v2
 ; GFX1030-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
 ; GFX1030-NEXT:    v_add_co_u32 v2, vcc_lo, v2, v4
@@ -2798,8 +2798,8 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) {
 ; GFX1030-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, 0, v7, vcc_lo
 ; GFX1030-NEXT:    v_add_co_u32 v5, vcc_lo, v2, v6
 ; GFX1030-NEXT:    v_add_co_ci_u32_e32 v6, vcc_lo, 0, v3, vcc_lo
-; GFX1030-NEXT:    v_mad_u64_u32 v[2:3], s4, 0x186a0, v5, 0
-; GFX1030-NEXT:    v_mad_u64_u32 v[3:4], s4, 0x186a0, v6, v[3:4]
+; GFX1030-NEXT:    v_mad_u64_u32 v[2:3], null, 0x186a0, v5, 0
+; GFX1030-NEXT:    v_mad_u64_u32 v[3:4], null, 0x186a0, v6, v[3:4]
 ; GFX1030-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v2
 ; GFX1030-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
 ; GFX1030-NEXT:    v_subrev_co_u32 v2, vcc_lo, 0x186a0, v0

diff  --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
index f7285051247c..13f4b92f42fd 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
@@ -476,7 +476,7 @@ define amdgpu_kernel void @livevariables_update_missed_block(i8 addrspace(1)* %s
   ; SI-NEXT: {{  $}}
   ; SI-NEXT:   [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[COPY]](p4), 36, 0 :: (dereferenceable invariant load (s64) from %ir.src1.kernarg.offset.cast, align 4, addrspace 4)
   ; SI-NEXT:   [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, killed %50, 0, implicit $exec
-  ; SI-NEXT:   %43:vgpr_32, dead %45:sreg_32_xm0_xexec = V_ADDC_U32_e64 0, killed [[S_LOAD_DWORDX2_IMM]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+  ; SI-NEXT:   %43:vgpr_32, dead $sgpr_null = V_ADDC_U32_e64 0, killed [[S_LOAD_DWORDX2_IMM]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
   ; SI-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_ADD_CO_U32_e64_]], %subreg.sub0, killed %43, %subreg.sub1
   ; SI-NEXT:   [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load (s8) from %ir.i10, addrspace 1)
   ; SI-NEXT:   [[V_MOV_B:%[0-9]+]]:vreg_64 = V_MOV_B64_PSEUDO 0, implicit $exec

diff  --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll
index ec91016353a6..6f1706ce4102 100644
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -360,8 +360,8 @@ bb:
 }
 
 ; GCN-LABEL: {{^}}test_div_scale_f32:
-; GFX1032: v_div_scale_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
-; GFX1064: v_div_scale_f32 v{{[0-9]+}}, s[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+; GFX1032: v_div_scale_f32 v{{[0-9]+}}, null, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+; GFX1064: v_div_scale_f32 v{{[0-9]+}}, null, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 define amdgpu_kernel void @test_div_scale_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
@@ -377,8 +377,8 @@ define amdgpu_kernel void @test_div_scale_f32(float addrspace(1)* %out, float ad
 }
 
 ; GCN-LABEL: {{^}}test_div_scale_f64:
-; GFX1032: v_div_scale_f64 v[{{[0-9:]+}}], s{{[0-9]+}}, v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]
-; GFX1064: v_div_scale_f64 v[{{[0-9:]+}}], s[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]
+; GFX1032: v_div_scale_f64 v[{{[0-9:]+}}], null, v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]
+; GFX1064: v_div_scale_f64 v[{{[0-9:]+}}], null, v[{{[0-9:]+}}], v[{{[0-9:]+}}], v[{{[0-9:]+}}]
 define amdgpu_kernel void @test_div_scale_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
@@ -394,8 +394,8 @@ define amdgpu_kernel void @test_div_scale_f64(double addrspace(1)* %out, double
 }
 
 ; GCN-LABEL: {{^}}test_mad_i64_i32:
-; GFX1032: v_mad_i64_i32 v[{{[0-9:]+}}], s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}]
-; GFX1064: v_mad_i64_i32 v[{{[0-9:]+}}], s[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}]
+; GFX1032: v_mad_i64_i32 v[{{[0-9:]+}}], null, v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}]
+; GFX1064: v_mad_i64_i32 v[{{[0-9:]+}}], null, v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}]
 define i64 @test_mad_i64_i32(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
   %sext0 = sext i32 %arg0 to i64
   %sext1 = sext i32 %arg1 to i64
@@ -405,8 +405,8 @@ define i64 @test_mad_i64_i32(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
 }
 
 ; GCN-LABEL: {{^}}test_mad_u64_u32:
-; GFX1032: v_mad_u64_u32 v[{{[0-9:]+}}], s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}]
-; GFX1064: v_mad_u64_u32 v[{{[0-9:]+}}], s[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}]
+; GFX1032: v_mad_u64_u32 v[{{[0-9:]+}}], null, v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}]
+; GFX1064: v_mad_u64_u32 v[{{[0-9:]+}}], null, v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9:]+}}]
 define i64 @test_mad_u64_u32(i32 %arg0, i32 %arg1, i64 %arg2) #0 {
   %sext0 = zext i32 %arg0 to i64
   %sext1 = zext i32 %arg1 to i64
@@ -478,8 +478,8 @@ exit:
 }
 
 ; GCN-LABEL: {{^}}fdiv_f32:
-; GFX1032: v_div_scale_f32 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
-; GFX1064: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
+; GFX1032: v_div_scale_f32 v{{[0-9]+}}, null, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
+; GFX1064: v_div_scale_f32 v{{[0-9]+}}, null, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
 ; GCN: v_rcp_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}
 ; GFX1032: v_div_scale_f32 v{{[0-9]+}}, vcc_lo, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
 ; GFX1064: v_div_scale_f32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}


        


More information about the llvm-commits mailing list