[llvm] 39f8a79 - AMDGPU: Try to eliminate clearing of high bits of 16-bit instructions

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 22 10:42:55 PDT 2021


Author: Matt Arsenault
Date: 2021-06-22T13:42:49-04:00
New Revision: 39f8a792f0ac4efed11ac906ba76137fc0c9f6a8

URL: https://github.com/llvm/llvm-project/commit/39f8a792f0ac4efed11ac906ba76137fc0c9f6a8
DIFF: https://github.com/llvm/llvm-project/commit/39f8a792f0ac4efed11ac906ba76137fc0c9f6a8.diff

LOG: AMDGPU: Try to eliminate clearing of high bits of 16-bit instructions

These used to consistently be zeroed pre-gfx9, but gfx9 made the
situation complicated since now some still do and some don't. This
also manages to pick up a few cases that the pattern fails to optimize
away.

We handle some cases with instruction patterns, but some get
through. In particular this improves the integer cases.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
    llvm/lib/Target/AMDGPU/GCNSubtarget.h
    llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
    llvm/test/CodeGen/AMDGPU/fmax3.ll
    llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
    llvm/test/CodeGen/AMDGPU/fmin3.ll
    llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
    llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
    llvm/test/CodeGen/AMDGPU/high-bits-zeroed-16-bit-ops.mir
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.f16.ll
    llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
    llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
    llvm/test/CodeGen/AMDGPU/preserve-hi16.ll
    llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll
    llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll
    llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll
    llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
    llvm/test/CodeGen/AMDGPU/uaddsat.ll
    llvm/test/CodeGen/AMDGPU/usubsat.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 1728f4725858..a4636518522d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -353,6 +353,105 @@ unsigned GCNSubtarget::getConstantBusLimit(unsigned Opcode) const {
   return 2;
 }
 
+/// This list was mostly derived from experimentation.
+bool GCNSubtarget::zeroesHigh16BitsOfDest(unsigned Opcode) const {
+  switch (Opcode) {
+  case AMDGPU::V_CVT_F16_F32_e32:
+  case AMDGPU::V_CVT_F16_F32_e64:
+  case AMDGPU::V_CVT_F16_U16_e32:
+  case AMDGPU::V_CVT_F16_U16_e64:
+  case AMDGPU::V_CVT_F16_I16_e32:
+  case AMDGPU::V_CVT_F16_I16_e64:
+  case AMDGPU::V_RCP_F16_e64:
+  case AMDGPU::V_RCP_F16_e32:
+  case AMDGPU::V_RSQ_F16_e64:
+  case AMDGPU::V_RSQ_F16_e32:
+  case AMDGPU::V_SQRT_F16_e64:
+  case AMDGPU::V_SQRT_F16_e32:
+  case AMDGPU::V_LOG_F16_e64:
+  case AMDGPU::V_LOG_F16_e32:
+  case AMDGPU::V_EXP_F16_e64:
+  case AMDGPU::V_EXP_F16_e32:
+  case AMDGPU::V_SIN_F16_e64:
+  case AMDGPU::V_SIN_F16_e32:
+  case AMDGPU::V_COS_F16_e64:
+  case AMDGPU::V_COS_F16_e32:
+  case AMDGPU::V_FLOOR_F16_e64:
+  case AMDGPU::V_FLOOR_F16_e32:
+  case AMDGPU::V_CEIL_F16_e64:
+  case AMDGPU::V_CEIL_F16_e32:
+  case AMDGPU::V_TRUNC_F16_e64:
+  case AMDGPU::V_TRUNC_F16_e32:
+  case AMDGPU::V_RNDNE_F16_e64:
+  case AMDGPU::V_RNDNE_F16_e32:
+  case AMDGPU::V_FRACT_F16_e64:
+  case AMDGPU::V_FRACT_F16_e32:
+  case AMDGPU::V_FREXP_MANT_F16_e64:
+  case AMDGPU::V_FREXP_MANT_F16_e32:
+  case AMDGPU::V_FREXP_EXP_I16_F16_e64:
+  case AMDGPU::V_FREXP_EXP_I16_F16_e32:
+  case AMDGPU::V_LDEXP_F16_e64:
+  case AMDGPU::V_LDEXP_F16_e32:
+  case AMDGPU::V_LSHLREV_B16_e64:
+  case AMDGPU::V_LSHLREV_B16_e32:
+  case AMDGPU::V_LSHRREV_B16_e64:
+  case AMDGPU::V_LSHRREV_B16_e32:
+  case AMDGPU::V_ASHRREV_I16_e64:
+  case AMDGPU::V_ASHRREV_I16_e32:
+  case AMDGPU::V_ADD_U16_e64:
+  case AMDGPU::V_ADD_U16_e32:
+  case AMDGPU::V_SUB_U16_e64:
+  case AMDGPU::V_SUB_U16_e32:
+  case AMDGPU::V_SUBREV_U16_e64:
+  case AMDGPU::V_SUBREV_U16_e32:
+  case AMDGPU::V_MUL_LO_U16_e64:
+  case AMDGPU::V_MUL_LO_U16_e32:
+  case AMDGPU::V_ADD_F16_e64:
+  case AMDGPU::V_ADD_F16_e32:
+  case AMDGPU::V_SUB_F16_e64:
+  case AMDGPU::V_SUB_F16_e32:
+  case AMDGPU::V_SUBREV_F16_e64:
+  case AMDGPU::V_SUBREV_F16_e32:
+  case AMDGPU::V_MUL_F16_e64:
+  case AMDGPU::V_MUL_F16_e32:
+  case AMDGPU::V_MAX_F16_e64:
+  case AMDGPU::V_MAX_F16_e32:
+  case AMDGPU::V_MIN_F16_e64:
+  case AMDGPU::V_MIN_F16_e32:
+  case AMDGPU::V_MAX_U16_e64:
+  case AMDGPU::V_MAX_U16_e32:
+  case AMDGPU::V_MIN_U16_e64:
+  case AMDGPU::V_MIN_U16_e32:
+  case AMDGPU::V_MAX_I16_e64:
+  case AMDGPU::V_MAX_I16_e32:
+  case AMDGPU::V_MIN_I16_e64:
+  case AMDGPU::V_MIN_I16_e32:
+    // On gfx10, all 16-bit instructions preserve the high bits.
+    return getGeneration() <= AMDGPUSubtarget::GFX9;
+  case AMDGPU::V_MAD_F16_e64:
+  case AMDGPU::V_MADAK_F16:
+  case AMDGPU::V_MADMK_F16:
+  case AMDGPU::V_MAC_F16_e64:
+  case AMDGPU::V_MAC_F16_e32:
+  case AMDGPU::V_FMAMK_F16:
+  case AMDGPU::V_FMAAK_F16:
+  case AMDGPU::V_MAD_U16_e64:
+  case AMDGPU::V_MAD_I16_e64:
+  case AMDGPU::V_FMA_F16_e64:
+  case AMDGPU::V_FMAC_F16_e64:
+  case AMDGPU::V_FMAC_F16_e32:
+  case AMDGPU::V_DIV_FIXUP_F16_e64:
+    // In gfx9, the preferred handling of the unused high 16-bits changed. Most
+    // instructions maintain the legacy behavior of 0ing. Some instructions
+    // changed to preserving the high bits.
+    return getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
+  case AMDGPU::V_MAD_MIXLO_F16:
+  case AMDGPU::V_MAD_MIXHI_F16:
+  default:
+    return false;
+  }
+}
+
 unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
   const Function &F) const {
   if (NWaves == 1)

diff  --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 39abb00680b3..dc53568c1b9d 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -286,6 +286,11 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
 
   unsigned getConstantBusLimit(unsigned Opcode) const;
 
+  /// Returns if the result of this instruction with a 16-bit result returned in
+  /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
+  /// the original value.
+  bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
+
   bool hasIntClamp() const {
     return HasIntClamp;
   }

diff  --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index bf02637c394a..ad910522ba90 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -91,6 +91,7 @@ class SIFoldOperands : public MachineFunctionPass {
                    SmallVectorImpl<MachineInstr *> &CopiesToReplace) const;
 
   bool tryFoldCndMask(MachineInstr &MI) const;
+  bool tryFoldZeroHighBits(MachineInstr &MI) const;
   void foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const;
 
   const MachineOperand *isClamp(const MachineInstr &MI) const;
@@ -1188,6 +1189,27 @@ bool SIFoldOperands::tryFoldCndMask(MachineInstr &MI) const {
   return true;
 }
 
+bool SIFoldOperands::tryFoldZeroHighBits(MachineInstr &MI) const {
+  if (MI.getOpcode() != AMDGPU::V_AND_B32_e64 &&
+      MI.getOpcode() != AMDGPU::V_AND_B32_e32)
+    return false;
+
+  MachineOperand *Src0 = getImmOrMaterializedImm(*MRI, MI.getOperand(1));
+  if (!Src0->isImm() || Src0->getImm() != 0xffff)
+    return false;
+
+  Register Src1 = MI.getOperand(2).getReg();
+  MachineInstr *SrcDef = MRI->getVRegDef(Src1);
+  if (ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode())) {
+    Register Dst = MI.getOperand(0).getReg();
+    MRI->replaceRegWith(Dst, SrcDef->getOperand(0).getReg());
+    MI.eraseFromParent();
+    return true;
+  }
+
+  return false;
+}
+
 void SIFoldOperands::foldInstOperand(MachineInstr &MI,
                                      MachineOperand &OpToFold) const {
   // We need mutate the operands of new mov instructions to add implicit
@@ -1721,6 +1743,9 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
     for (auto &MI : make_early_inc_range(*MBB)) {
       tryFoldCndMask(MI);
 
+      if (tryFoldZeroHighBits(MI))
+        continue;
+
       if (MI.isRegSequence() && tryFoldRegSequence(MI))
         continue;
 

diff  --git a/llvm/test/CodeGen/AMDGPU/fmax3.ll b/llvm/test/CodeGen/AMDGPU/fmax3.ll
index 5a92eac7f32f..a3194a749be0 100644
--- a/llvm/test/CodeGen/AMDGPU/fmax3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmax3.ll
@@ -113,7 +113,7 @@ define amdgpu_kernel void @test_fmax3_olt_1_f16(half addrspace(1)* %out, half ad
 ; VI-NEXT: v_max_f16_e32 v0, v2, v0
 ; VI-NEXT: v_max_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; VI-NEXT: v_max_f16_e32 v0, v0, v3
-; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: v_or_b32_e32 v0, v0, v1
 ; VI-NEXT: s_setpc_b64
 
 ; GFX9: s_waitcnt

diff  --git a/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll b/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
index 8ca2d57f6ead..23d0971e2be7 100644
--- a/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
@@ -97,7 +97,7 @@ define <2 x half> @test_fmax_legacy_ugt_v2f16(<2 x half> %a, <2 x half> %b) #0 {
 ; VI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-NNAN-NEXT:    v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; VI-NNAN-NEXT:    v_max_f16_e32 v0, v0, v1
-; VI-NNAN-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NNAN-NEXT:    v_or_b32_e32 v0, v0, v2
 ; VI-NNAN-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SAFE-LABEL: test_fmax_legacy_ugt_v2f16:
@@ -178,7 +178,7 @@ define <3 x half> @test_fmax_legacy_ugt_v3f16(<3 x half> %a, <3 x half> %b) #0 {
 ; VI-NNAN-NEXT:    v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; VI-NNAN-NEXT:    v_max_f16_e32 v0, v0, v2
 ; VI-NNAN-NEXT:    v_max_f16_e32 v1, v1, v3
-; VI-NNAN-NEXT:    v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NNAN-NEXT:    v_or_b32_e32 v0, v0, v4
 ; VI-NNAN-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SAFE-LABEL: test_fmax_legacy_ugt_v3f16:
@@ -283,8 +283,8 @@ define <4 x half> @test_fmax_legacy_ugt_v4f16(<4 x half> %a, <4 x half> %b) #0 {
 ; VI-NNAN-NEXT:    v_max_f16_e32 v1, v1, v3
 ; VI-NNAN-NEXT:    v_max_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; VI-NNAN-NEXT:    v_max_f16_e32 v0, v0, v2
-; VI-NNAN-NEXT:    v_or_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NNAN-NEXT:    v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NNAN-NEXT:    v_or_b32_e32 v0, v0, v5
+; VI-NNAN-NEXT:    v_or_b32_e32 v1, v1, v4
 ; VI-NNAN-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SAFE-LABEL: test_fmax_legacy_ugt_v4f16:
@@ -437,10 +437,10 @@ define <8 x half> @test_fmax_legacy_ugt_v8f16(<8 x half> %a, <8 x half> %b) #0 {
 ; VI-NNAN-NEXT:    v_max_f16_e32 v1, v1, v5
 ; VI-NNAN-NEXT:    v_max_f16_sdwa v11, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; VI-NNAN-NEXT:    v_max_f16_e32 v0, v0, v4
-; VI-NNAN-NEXT:    v_or_b32_sdwa v0, v0, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NNAN-NEXT:    v_or_b32_sdwa v1, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NNAN-NEXT:    v_or_b32_sdwa v2, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NNAN-NEXT:    v_or_b32_sdwa v3, v3, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NNAN-NEXT:    v_or_b32_e32 v0, v0, v11
+; VI-NNAN-NEXT:    v_or_b32_e32 v1, v1, v10
+; VI-NNAN-NEXT:    v_or_b32_e32 v2, v2, v9
+; VI-NNAN-NEXT:    v_or_b32_e32 v3, v3, v8
 ; VI-NNAN-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SAFE-LABEL: test_fmax_legacy_ugt_v8f16:

diff  --git a/llvm/test/CodeGen/AMDGPU/fmin3.ll b/llvm/test/CodeGen/AMDGPU/fmin3.ll
index 0c3b04d55d24..f891b326708e 100644
--- a/llvm/test/CodeGen/AMDGPU/fmin3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmin3.ll
@@ -102,7 +102,7 @@ define amdgpu_kernel void @test_fmin3_olt_1_f16(half addrspace(1)* %out, half ad
 ; VI-NEXT: v_min_f16_e32 v0, v2, v0
 ; VI-NEXT: v_min_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; VI-NEXT: v_min_f16_e32 v0, v0, v3
-; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: v_or_b32_e32 v0, v0, v1
 ; VI-NEXT: s_setpc_b64
 
 ; GFX9: s_waitcnt

diff  --git a/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll b/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
index bf4b93cd4dfc..22773ac06c12 100644
--- a/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
@@ -98,7 +98,7 @@ define <2 x half> @test_fmin_legacy_ule_v2f16(<2 x half> %a, <2 x half> %b) #0 {
 ; VI-NNAN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; VI-NNAN-NEXT:    v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; VI-NNAN-NEXT:    v_min_f16_e32 v0, v0, v1
-; VI-NNAN-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NNAN-NEXT:    v_or_b32_e32 v0, v0, v2
 ; VI-NNAN-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SAFE-LABEL: test_fmin_legacy_ule_v2f16:
@@ -179,7 +179,7 @@ define <3 x half> @test_fmin_legacy_ule_v3f16(<3 x half> %a, <3 x half> %b) #0 {
 ; VI-NNAN-NEXT:    v_min_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; VI-NNAN-NEXT:    v_min_f16_e32 v0, v0, v2
 ; VI-NNAN-NEXT:    v_min_f16_e32 v1, v1, v3
-; VI-NNAN-NEXT:    v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NNAN-NEXT:    v_or_b32_e32 v0, v0, v4
 ; VI-NNAN-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SAFE-LABEL: test_fmin_legacy_ule_v3f16:
@@ -284,8 +284,8 @@ define <4 x half> @test_fmin_legacy_ule_v4f16(<4 x half> %a, <4 x half> %b) #0 {
 ; VI-NNAN-NEXT:    v_min_f16_e32 v1, v1, v3
 ; VI-NNAN-NEXT:    v_min_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; VI-NNAN-NEXT:    v_min_f16_e32 v0, v0, v2
-; VI-NNAN-NEXT:    v_or_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NNAN-NEXT:    v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NNAN-NEXT:    v_or_b32_e32 v0, v0, v5
+; VI-NNAN-NEXT:    v_or_b32_e32 v1, v1, v4
 ; VI-NNAN-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SAFE-LABEL: test_fmin_legacy_ule_v4f16:
@@ -438,10 +438,10 @@ define <8 x half> @test_fmin_legacy_ule_v8f16(<8 x half> %a, <8 x half> %b) #0 {
 ; VI-NNAN-NEXT:    v_min_f16_e32 v1, v1, v5
 ; VI-NNAN-NEXT:    v_min_f16_sdwa v11, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; VI-NNAN-NEXT:    v_min_f16_e32 v0, v0, v4
-; VI-NNAN-NEXT:    v_or_b32_sdwa v0, v0, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NNAN-NEXT:    v_or_b32_sdwa v1, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NNAN-NEXT:    v_or_b32_sdwa v2, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NNAN-NEXT:    v_or_b32_sdwa v3, v3, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NNAN-NEXT:    v_or_b32_e32 v0, v0, v11
+; VI-NNAN-NEXT:    v_or_b32_e32 v1, v1, v10
+; VI-NNAN-NEXT:    v_or_b32_e32 v2, v2, v9
+; VI-NNAN-NEXT:    v_or_b32_e32 v3, v3, v8
 ; VI-NNAN-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; SI-SAFE-LABEL: test_fmin_legacy_ule_v8f16:

diff  --git a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
index abdfd2c9c677..469cfe96fb6d 100644
--- a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
@@ -73,8 +73,7 @@ entry:
 ; SIVI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
 
 ; GFX9-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[A_F32_1]]
-; GFX9: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]]
-; GFX9: v_lshl_or_b32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], 16, v[[R_F16_LO]]
+; GFX9: v_lshl_or_b32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], 16, v[[R_F16_0]]
 
 ; GCN: buffer_store_dword v[[R_V2_F16]]
 
@@ -141,7 +140,7 @@ entry:
 ; GCN: buffer_load_dword v[[A_F32:[0-9]+]]
 ; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]]
 ; SIVI-NOT: v[[R_F16]]
-; GFX9-NEXT: v_and_b32_e32 v[[R_F16]], 0xffff, v[[R_F16]]
+; GFX9-NOT: v_and_b32
 ; GCN: buffer_store_dword v[[R_F16]]
 define amdgpu_kernel void @fptrunc_f32_to_f16_zext_i32(
     i32 addrspace(1)* %r,
@@ -159,7 +158,7 @@ entry:
 ; GCN: buffer_load_dword v[[A_F32:[0-9]+]]
 ; GCN: v_cvt_f16_f32_e64 v[[R_F16:[0-9]+]], |v[[A_F32]]|
 ; SIVI-NOT: v[[R_F16]]
-; GFX9-NEXT: v_and_b32_e32 v[[R_F16]], 0xffff, v[[R_F16]]
+; GFX9-NOT: v_and_b32
 ; GCN: buffer_store_dword v[[R_F16]]
 define amdgpu_kernel void @fptrunc_fabs_f32_to_f16_zext_i32(
     i32 addrspace(1)* %r,

diff  --git a/llvm/test/CodeGen/AMDGPU/high-bits-zeroed-16-bit-ops.mir b/llvm/test/CodeGen/AMDGPU/high-bits-zeroed-16-bit-ops.mir
index 769450629653..b6aa5f670a58 100644
--- a/llvm/test/CodeGen/AMDGPU/high-bits-zeroed-16-bit-ops.mir
+++ b/llvm/test/CodeGen/AMDGPU/high-bits-zeroed-16-bit-ops.mir
@@ -14,26 +14,20 @@ body:             |
     ; GFX8: liveins: $vgpr0
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: %op:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, killed [[COPY]], 0, 0, implicit $mode, implicit $exec
-    ; GFX8: %and0:vgpr_32 = V_AND_B32_e32 65535, %op, implicit $exec
     ; GFX8: %smask:sreg_32 = S_MOV_B32 65535
-    ; GFX8: %and1:vgpr_32 = V_AND_B32_e64 %smask, %op, implicit $exec
     ; GFX8: %vmask:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec
-    ; GFX8: %and2:vgpr_32 = V_AND_B32_e64 %vmask, %op, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and0
-    ; GFX8: $vgpr1 = COPY %and1
-    ; GFX8: $vgpr2 = COPY %and2
+    ; GFX8: $vgpr0 = COPY %op
+    ; GFX8: $vgpr1 = COPY %op
+    ; GFX8: $vgpr2 = COPY %op
     ; GFX9-LABEL: name: v_cvt_f16_f32_altmask
     ; GFX9: liveins: $vgpr0
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: %op:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, killed [[COPY]], 0, 0, implicit $mode, implicit $exec
-    ; GFX9: %and0:vgpr_32 = V_AND_B32_e32 65535, %op, implicit $exec
     ; GFX9: %smask:sreg_32 = S_MOV_B32 65535
-    ; GFX9: %and1:vgpr_32 = V_AND_B32_e64 %smask, %op, implicit $exec
     ; GFX9: %vmask:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec
-    ; GFX9: %and2:vgpr_32 = V_AND_B32_e64 %vmask, %op, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and0
-    ; GFX9: $vgpr1 = COPY %and1
-    ; GFX9: $vgpr2 = COPY %and2
+    ; GFX9: $vgpr0 = COPY %op
+    ; GFX9: $vgpr1 = COPY %op
+    ; GFX9: $vgpr2 = COPY %op
     ; GFX10-LABEL: name: v_cvt_f16_f32_altmask
     ; GFX10: liveins: $vgpr0
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -103,19 +97,15 @@ body:             |
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX8: %op_vop1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop1
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop1
     ; GFX9-LABEL: name: v_cvt_f16_f32
     ; GFX9: liveins: $vgpr0
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX9: %op_vop1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop1
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop1
     ; GFX10-LABEL: name: v_cvt_f16_f32
     ; GFX10: liveins: $vgpr0
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -146,19 +136,15 @@ body:             |
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_CVT_F16_U16_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX8: %op_vop1:vgpr_32 = nofpexcept V_CVT_F16_U16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop1
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop1
     ; GFX9-LABEL: name: v_cvt_f16_u16
     ; GFX9: liveins: $vgpr0
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_CVT_F16_U16_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX9: %op_vop1:vgpr_32 = nofpexcept V_CVT_F16_U16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop1
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop1
     ; GFX10-LABEL: name: v_cvt_f16_u16
     ; GFX10: liveins: $vgpr0
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -189,19 +175,15 @@ body:             |
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_CVT_F16_I16_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX8: %op_vop1:vgpr_32 = nofpexcept V_CVT_F16_I16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop1
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop1
     ; GFX9-LABEL: name: v_cvt_f16_i16
     ; GFX9: liveins: $vgpr0
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_CVT_F16_I16_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX9: %op_vop1:vgpr_32 = nofpexcept V_CVT_F16_I16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop1
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop1
     ; GFX10-LABEL: name: v_cvt_f16_i16
     ; GFX10: liveins: $vgpr0
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -232,19 +214,15 @@ body:             |
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_RCP_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX8: %op_vop1:vgpr_32 = nofpexcept V_RCP_F16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop1
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop1
     ; GFX9-LABEL: name: v_rcp_f16
     ; GFX9: liveins: $vgpr0
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_RCP_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX9: %op_vop1:vgpr_32 = nofpexcept V_RCP_F16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop1
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop1
     ; GFX10-LABEL: name: v_rcp_f16
     ; GFX10: liveins: $vgpr0
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -274,19 +252,15 @@ body:             |
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_RSQ_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX8: %op_vop1:vgpr_32 = nofpexcept V_RSQ_F16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop1
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop1
     ; GFX9-LABEL: name: v_rsq_f16
     ; GFX9: liveins: $vgpr0
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_RSQ_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX9: %op_vop1:vgpr_32 = nofpexcept V_RSQ_F16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop1
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop1
     ; GFX10-LABEL: name: v_rsq_f16
     ; GFX10: liveins: $vgpr0
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -316,19 +290,15 @@ body:             |
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_SQRT_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX8: %op_vop1:vgpr_32 = nofpexcept V_SQRT_F16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop1
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop1
     ; GFX9-LABEL: name: v_sqrt_f16
     ; GFX9: liveins: $vgpr0
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_SQRT_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX9: %op_vop1:vgpr_32 = nofpexcept V_SQRT_F16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop1
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop1
     ; GFX10-LABEL: name: v_sqrt_f16
     ; GFX10: liveins: $vgpr0
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -359,19 +329,15 @@ body:             |
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_LOG_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX8: %op_vop1:vgpr_32 = nofpexcept V_LOG_F16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop1
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop1
     ; GFX9-LABEL: name: v_log_f16
     ; GFX9: liveins: $vgpr0
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_LOG_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX9: %op_vop1:vgpr_32 = nofpexcept V_LOG_F16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop1
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop1
     ; GFX10-LABEL: name: v_log_f16
     ; GFX10: liveins: $vgpr0
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -402,19 +368,15 @@ body:             |
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_EXP_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX8: %op_vop1:vgpr_32 = nofpexcept V_EXP_F16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop1
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop1
     ; GFX9-LABEL: name: v_exp_f16
     ; GFX9: liveins: $vgpr0
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_EXP_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX9: %op_vop1:vgpr_32 = nofpexcept V_EXP_F16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop1
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop1
     ; GFX10-LABEL: name: v_exp_f16
     ; GFX10: liveins: $vgpr0
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -445,19 +407,15 @@ body:             |
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_SIN_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX8: %op_vop1:vgpr_32 = nofpexcept V_SIN_F16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop1
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop1
     ; GFX9-LABEL: name: v_sin_f16
     ; GFX9: liveins: $vgpr0
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_SIN_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX9: %op_vop1:vgpr_32 = nofpexcept V_SIN_F16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop1
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop1
     ; GFX10-LABEL: name: v_sin_f16
     ; GFX10: liveins: $vgpr0
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -488,19 +446,15 @@ body:             |
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_COS_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX8: %op_vop1:vgpr_32 = nofpexcept V_COS_F16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop1
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop1
     ; GFX9-LABEL: name: v_cos_f16
     ; GFX9: liveins: $vgpr0
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_COS_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX9: %op_vop1:vgpr_32 = nofpexcept V_COS_F16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop1
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop1
     ; GFX10-LABEL: name: v_cos_f16
     ; GFX10: liveins: $vgpr0
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -531,19 +485,15 @@ body:             |
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX8: %op_vop1:vgpr_32 = nofpexcept V_FLOOR_F16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop1
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop1
     ; GFX9-LABEL: name: v_floor_f16
     ; GFX9: liveins: $vgpr0
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX9: %op_vop1:vgpr_32 = nofpexcept V_FLOOR_F16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop1
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop1
     ; GFX10-LABEL: name: v_floor_f16
     ; GFX10: liveins: $vgpr0
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -574,19 +524,15 @@ body:             |
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_CEIL_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX8: %op_vop1:vgpr_32 = nofpexcept V_CEIL_F16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop1
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop1
     ; GFX9-LABEL: name: v_ceil_f16
     ; GFX9: liveins: $vgpr0
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_CEIL_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX9: %op_vop1:vgpr_32 = nofpexcept V_CEIL_F16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop1
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop1
     ; GFX10-LABEL: name: v_ceil_f16
     ; GFX10: liveins: $vgpr0
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -617,19 +563,15 @@ body:             |
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_TRUNC_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX8: %op_vop1:vgpr_32 = nofpexcept V_TRUNC_F16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop1
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop1
     ; GFX9-LABEL: name: v_trunc_f16
     ; GFX9: liveins: $vgpr0
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_TRUNC_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX9: %op_vop1:vgpr_32 = nofpexcept V_TRUNC_F16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop1
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop1
     ; GFX10-LABEL: name: v_trunc_f16
     ; GFX10: liveins: $vgpr0
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -660,19 +602,15 @@ body:             |
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_RNDNE_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX8: %op_vop1:vgpr_32 = nofpexcept V_RNDNE_F16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop1
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop1
     ; GFX9-LABEL: name: v_rndne_f16
     ; GFX9: liveins: $vgpr0
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_RNDNE_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX9: %op_vop1:vgpr_32 = nofpexcept V_RNDNE_F16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop1
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop1
     ; GFX10-LABEL: name: v_rndne_f16
     ; GFX10: liveins: $vgpr0
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -703,19 +641,15 @@ body:             |
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_FRACT_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX8: %op_vop1:vgpr_32 = nofpexcept V_FRACT_F16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop1
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop1
     ; GFX9-LABEL: name: v_fract_f16
     ; GFX9: liveins: $vgpr0
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_FRACT_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX9: %op_vop1:vgpr_32 = nofpexcept V_FRACT_F16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop1
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop1
     ; GFX10-LABEL: name: v_fract_f16
     ; GFX10: liveins: $vgpr0
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -746,19 +680,15 @@ body:             |
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_FREXP_MANT_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX8: %op_vop1:vgpr_32 = nofpexcept V_FREXP_MANT_F16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop1
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop1
     ; GFX9-LABEL: name: v_frexp_mant_f16
     ; GFX9: liveins: $vgpr0
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_FREXP_MANT_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX9: %op_vop1:vgpr_32 = nofpexcept V_FREXP_MANT_F16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop1
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop1
     ; GFX10-LABEL: name: v_frexp_mant_f16
     ; GFX10: liveins: $vgpr0
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -789,19 +719,15 @@ body:             |
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_FREXP_EXP_I16_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX8: %op_vop1:vgpr_32 = nofpexcept V_FREXP_EXP_I16_F16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop1
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop1
     ; GFX9-LABEL: name: v_frexp_exp_f16
     ; GFX9: liveins: $vgpr0
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_FREXP_EXP_I16_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
     ; GFX9: %op_vop1:vgpr_32 = nofpexcept V_FREXP_EXP_I16_F16_e32 [[COPY]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop1
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop1
     ; GFX10-LABEL: name: v_frexp_exp_f16
     ; GFX10: liveins: $vgpr0
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -833,20 +759,16 @@ body:             |
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
     ; GFX8: %op_vop1:vgpr_32 = nofpexcept V_LDEXP_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop2
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop1
     ; GFX9-LABEL: name: v_ldexp_f16
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
     ; GFX9: %op_vop1:vgpr_32 = nofpexcept V_LDEXP_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop2
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop1
     ; GFX10-LABEL: name: v_ldexp_f16
     ; GFX10: liveins: $vgpr0, $vgpr1
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -880,20 +802,16 @@ body:             |
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_LSHLREV_B16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
     ; GFX8: %op_vop2:vgpr_32 = nofpexcept V_LSHLREV_B16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop2
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop2
     ; GFX9-LABEL: name: v_lshlrev_b16
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_LSHLREV_B16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
     ; GFX9: %op_vop2:vgpr_32 = nofpexcept V_LSHLREV_B16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop2
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop2
     ; GFX10-LABEL: name: v_lshlrev_b16
     ; GFX10: liveins: $vgpr0, $vgpr1
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -927,20 +845,16 @@ body:             |
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_LSHRREV_B16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
     ; GFX8: %op_vop2:vgpr_32 = nofpexcept V_LSHRREV_B16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop2
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop2
     ; GFX9-LABEL: name: v_lshrrev_b16
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_LSHRREV_B16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
     ; GFX9: %op_vop2:vgpr_32 = nofpexcept V_LSHRREV_B16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop2
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop2
     ; GFX10-LABEL: name: v_lshrrev_b16
     ; GFX10: liveins: $vgpr0, $vgpr1
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -974,20 +888,16 @@ body:             |
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_ASHRREV_I16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
     ; GFX8: %op_vop2:vgpr_32 = nofpexcept V_ASHRREV_I16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop2
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop2
     ; GFX9-LABEL: name: v_ashrrev_i16
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_ASHRREV_I16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
     ; GFX9: %op_vop2:vgpr_32 = nofpexcept V_ASHRREV_I16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop2
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop2
     ; GFX10-LABEL: name: v_ashrrev_i16
     ; GFX10: liveins: $vgpr0, $vgpr1
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1021,20 +931,16 @@ body:             |
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_ADD_U16_e64 [[COPY]], [[COPY1]], 0, implicit $mode, implicit $exec
     ; GFX8: %op_vop2:vgpr_32 = nofpexcept V_ADD_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop2
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop2
     ; GFX9-LABEL: name: v_add_u16
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_ADD_U16_e64 [[COPY]], [[COPY1]], 0, implicit $mode, implicit $exec
     ; GFX9: %op_vop2:vgpr_32 = nofpexcept V_ADD_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop2
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop2
     ; GFX10-LABEL: name: v_add_u16
     ; GFX10: liveins: $vgpr0, $vgpr1
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1068,20 +974,16 @@ body:             |
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_SUB_U16_e64 [[COPY]], [[COPY1]], 0, implicit $mode, implicit $exec
     ; GFX8: %op_vop2:vgpr_32 = nofpexcept V_SUB_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop2
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop2
     ; GFX9-LABEL: name: v_sub_u16
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_SUB_U16_e64 [[COPY]], [[COPY1]], 0, implicit $mode, implicit $exec
     ; GFX9: %op_vop2:vgpr_32 = nofpexcept V_SUB_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop2
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop2
     ; GFX10-LABEL: name: v_sub_u16
     ; GFX10: liveins: $vgpr0, $vgpr1
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1115,20 +1017,16 @@ body:             |
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_SUBREV_U16_e64 [[COPY]], [[COPY1]], 0, implicit $mode, implicit $exec
     ; GFX8: %op_vop2:vgpr_32 = nofpexcept V_SUBREV_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop2
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop2
     ; GFX9-LABEL: name: v_subrev_u16
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_SUBREV_U16_e64 [[COPY]], [[COPY1]], 0, implicit $mode, implicit $exec
     ; GFX9: %op_vop2:vgpr_32 = nofpexcept V_SUBREV_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop2
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop2
     ; GFX10-LABEL: name: v_subrev_u16
     ; GFX10: liveins: $vgpr0, $vgpr1
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1162,20 +1060,16 @@ body:             |
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_MUL_LO_U16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
     ; GFX8: %op_vop2:vgpr_32 = nofpexcept V_MUL_LO_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop2
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop2
     ; GFX9-LABEL: name: v_mul_lo_u16
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_MUL_LO_U16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
     ; GFX9: %op_vop2:vgpr_32 = nofpexcept V_MUL_LO_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop2
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop2
     ; GFX10-LABEL: name: v_mul_lo_u16
     ; GFX10: liveins: $vgpr0, $vgpr1
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1209,20 +1103,16 @@ body:             |
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
     ; GFX8: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop2
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop1
     ; GFX9-LABEL: name: v_add_f16
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
     ; GFX9: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop2
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop1
     ; GFX10-LABEL: name: v_add_f16
     ; GFX10: liveins: $vgpr0, $vgpr1
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1256,20 +1146,16 @@ body:             |
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
     ; GFX8: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop2
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop1
     ; GFX9-LABEL: name: v_sub_f16
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
     ; GFX9: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop2
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop1
     ; GFX10-LABEL: name: v_sub_f16
     ; GFX10: liveins: $vgpr0, $vgpr1
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1303,20 +1189,16 @@ body:             |
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
     ; GFX8: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop2
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop1
     ; GFX9-LABEL: name: v_subrev_f16
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
     ; GFX9: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop2
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop1
     ; GFX10-LABEL: name: v_subrev_f16
     ; GFX10: liveins: $vgpr0, $vgpr1
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1350,20 +1232,16 @@ body:             |
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
     ; GFX8: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop2
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop1
     ; GFX9-LABEL: name: v_mul_f16
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
     ; GFX9: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop2
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop1
     ; GFX10-LABEL: name: v_mul_f16
     ; GFX10: liveins: $vgpr0, $vgpr1
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1397,20 +1275,16 @@ body:             |
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
     ; GFX8: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop2
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop1
     ; GFX9-LABEL: name: v_max_f16
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
     ; GFX9: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop2
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop1
     ; GFX10-LABEL: name: v_max_f16
     ; GFX10: liveins: $vgpr0, $vgpr1
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1444,20 +1318,16 @@ body:             |
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
     ; GFX8: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop2
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop1
     ; GFX9-LABEL: name: v_min_f16
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
     ; GFX9: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop2
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop1
     ; GFX10-LABEL: name: v_min_f16
     ; GFX10: liveins: $vgpr0, $vgpr1
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1491,20 +1361,16 @@ body:             |
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
     ; GFX8: %op_vop2:vgpr_32 = nofpexcept V_MAX_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop2
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop2
     ; GFX9-LABEL: name: v_max_u16
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
     ; GFX9: %op_vop2:vgpr_32 = nofpexcept V_MAX_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop2
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop2
     ; GFX10-LABEL: name: v_max_u16
     ; GFX10: liveins: $vgpr0, $vgpr1
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1537,20 +1403,16 @@ body:             |
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
     ; GFX8: %op_vop2:vgpr_32 = nofpexcept V_MIN_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop2
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop2
     ; GFX9-LABEL: name: v_min_u16
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
     ; GFX9: %op_vop2:vgpr_32 = nofpexcept V_MIN_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop2
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop2
     ; GFX10-LABEL: name: v_min_u16
     ; GFX10: liveins: $vgpr0, $vgpr1
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1584,20 +1446,16 @@ body:             |
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
     ; GFX8: %op_vop2:vgpr_32 = nofpexcept V_MAX_I16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop2
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop2
     ; GFX9-LABEL: name: v_max_i16
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
     ; GFX9: %op_vop2:vgpr_32 = nofpexcept V_MAX_I16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop2
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop2
     ; GFX10-LABEL: name: v_max_i16
     ; GFX10: liveins: $vgpr0, $vgpr1
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1630,20 +1488,16 @@ body:             |
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
     ; GFX8: %op_vop2:vgpr_32 = nofpexcept V_MIN_I16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop3
-    ; GFX8: $vgpr1 = COPY %and_vop2
+    ; GFX8: $vgpr0 = COPY %op_vop3
+    ; GFX8: $vgpr1 = COPY %op_vop2
     ; GFX9-LABEL: name: v_min_i16
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX9: %op_vop3:vgpr_32 = nofpexcept V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
     ; GFX9: %op_vop2:vgpr_32 = nofpexcept V_MIN_I16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
-    ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
-    ; GFX9: $vgpr0 = COPY %and_vop3
-    ; GFX9: $vgpr1 = COPY %and_vop2
+    ; GFX9: $vgpr0 = COPY %op_vop3
+    ; GFX9: $vgpr1 = COPY %op_vop2
     ; GFX10-LABEL: name: v_min_i16
     ; GFX10: liveins: $vgpr0, $vgpr1
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1677,8 +1531,7 @@ body:             |
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
     ; GFX8: %op:vgpr_32 = nofpexcept V_MAD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
-    ; GFX8: %and:vgpr_32 = V_AND_B32_e32 65535, %op, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and
+    ; GFX8: $vgpr0 = COPY %op
     ; GFX9-LABEL: name: v_mad_f16
     ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1716,8 +1569,7 @@ body:             |
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
     ; GFX8: %op:vgpr_32 = nofpexcept V_FMA_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
-    ; GFX8: %and:vgpr_32 = V_AND_B32_e32 65535, %op, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and
+    ; GFX8: $vgpr0 = COPY %op
     ; GFX9-LABEL: name: v_fma_f16
     ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1755,8 +1607,7 @@ body:             |
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
     ; GFX8: %op:vgpr_32 = nofpexcept V_DIV_FIXUP_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
-    ; GFX8: %and:vgpr_32 = V_AND_B32_e32 65535, %op, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and
+    ; GFX8: $vgpr0 = COPY %op
     ; GFX9-LABEL: name: v_div_fixup_f16
     ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1793,8 +1644,7 @@ body:             |
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: %op:vgpr_32 = nofpexcept V_MADAK_F16 [[COPY]], [[COPY1]], 1234, implicit $mode, implicit $exec
-    ; GFX8: %and:vgpr_32 = V_AND_B32_e32 65535, %op, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and
+    ; GFX8: $vgpr0 = COPY %op
     ; GFX9-LABEL: name: v_madak_f16
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1828,8 +1678,7 @@ body:             |
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: %op:vgpr_32 = nofpexcept V_MADMK_F16 [[COPY]], 1234, [[COPY1]], implicit $mode, implicit $exec
-    ; GFX8: %and:vgpr_32 = V_AND_B32_e32 65535, %op, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and
+    ; GFX8: $vgpr0 = COPY %op
     ; GFX9-LABEL: name: v_madmk_f16
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1863,8 +1712,7 @@ body:             |
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: %op:vgpr_32 = nofpexcept V_FMAAK_F16 [[COPY]], [[COPY1]], 1234, implicit $mode, implicit $exec
-    ; GFX8: %and:vgpr_32 = V_AND_B32_e32 65535, %op, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and
+    ; GFX8: $vgpr0 = COPY %op
     ; GFX9-LABEL: name: v_fmaak_f16
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1898,8 +1746,7 @@ body:             |
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: %op:vgpr_32 = nofpexcept V_FMAMK_F16 [[COPY]], 1234, [[COPY1]], implicit $mode, implicit $exec
-    ; GFX8: %and:vgpr_32 = V_AND_B32_e32 65535, %op, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and
+    ; GFX8: $vgpr0 = COPY %op
     ; GFX9-LABEL: name: v_fmamk_f16
     ; GFX9: liveins: $vgpr0, $vgpr1
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1935,10 +1782,8 @@ body:             |
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
     ; GFX8: %op_vop2:vgpr_32 = nofpexcept V_MAC_F16_e32 [[COPY]], [[COPY1]], [[COPY2]], implicit $mode, implicit $exec
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_MAC_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
-    ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop2
-    ; GFX8: $vgpr0 = COPY %and_vop3
+    ; GFX8: $vgpr0 = COPY %op_vop2
+    ; GFX8: $vgpr0 = COPY %op_vop3
     ; GFX9-LABEL: name: v_mac_f16
     ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1986,10 +1831,8 @@ body:             |
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
     ; GFX8: %op_vop2:vgpr_32 = nofpexcept V_FMAC_F16_e32 [[COPY]], [[COPY1]], [[COPY2]], implicit $mode, implicit $exec
     ; GFX8: %op_vop3:vgpr_32 = nofpexcept V_FMAC_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
-    ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
-    ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
-    ; GFX8: $vgpr0 = COPY %and_vop2
-    ; GFX8: $vgpr0 = COPY %and_vop3
+    ; GFX8: $vgpr0 = COPY %op_vop2
+    ; GFX8: $vgpr0 = COPY %op_vop3
     ; GFX9-LABEL: name: v_fmac_f16
     ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.f16.ll
index dc3eb4ce191e..ee07678e1b3a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.f16.ll
@@ -35,8 +35,7 @@ entry:
 ; GCN-LABEL: {{^}}frexp_exp_f16_zext
 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
 ; VI:  v_frexp_exp_i16_f16_e32 v[[R_I16:[0-9]+]], v[[A_F16]]
-; VI:  v_and_b32_e32 v[[R_I32:[0-9]+]], 0xffff, v[[R_I16]]
-; GCN: buffer_store_dword v[[R_I32]]
+; GCN: buffer_store_dword v[[R_I16]]
 define amdgpu_kernel void @frexp_exp_f16_zext(
     i32 addrspace(1)* %r,
     half addrspace(1)* %a) {

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
index 7229c9959f85..20d86f53547c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
@@ -350,7 +350,7 @@ define amdgpu_kernel void @maxnum_v2f16(
 ; VI-NEXT:    v_max_f16_e64 v1, s5, s5
 ; VI-NEXT:    v_max_f16_e64 v2, s4, s4
 ; VI-NEXT:    v_max_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v0, v0, v1
 ; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;
@@ -438,7 +438,7 @@ define amdgpu_kernel void @maxnum_v2f16_imm_a(
 ; VI-NEXT:    v_max_f16_e64 v1, s4, s4
 ; VI-NEXT:    v_max_f16_e32 v0, 0x4200, v0
 ; VI-NEXT:    v_max_f16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v0, v0, v1
 ; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;
@@ -518,7 +518,7 @@ define amdgpu_kernel void @maxnum_v2f16_imm_b(
 ; VI-NEXT:    v_max_f16_e64 v1, s4, s4
 ; VI-NEXT:    v_max_f16_e32 v0, 4.0, v0
 ; VI-NEXT:    v_max_f16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v0, v0, v1
 ; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;
@@ -619,7 +619,7 @@ define amdgpu_kernel void @maxnum_v3f16(
 ; VI-NEXT:    v_max_f16_e64 v1, s6, s6
 ; VI-NEXT:    v_max_f16_e64 v2, s4, s4
 ; VI-NEXT:    v_max_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v0, v0, v1
 ; VI-NEXT:    v_max_f16_e64 v1, s7, s7
 ; VI-NEXT:    v_max_f16_e64 v2, s5, s5
 ; VI-NEXT:    v_max_f16_e32 v1, v2, v1
@@ -749,7 +749,7 @@ define amdgpu_kernel void @maxnum_v4f16(
 ; VI-NEXT:    v_max_f16_e64 v2, s5, s5
 ; VI-NEXT:    v_max_f16_e64 v1, s7, s7
 ; VI-NEXT:    v_max_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT:    v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v1, v0, v1
 ; VI-NEXT:    v_max_f16_e64 v2, s4, s4
 ; VI-NEXT:    v_max_f16_e64 v0, s6, s6
 ; VI-NEXT:    s_lshr_b32 s4, s4, 16
@@ -758,7 +758,7 @@ define amdgpu_kernel void @maxnum_v4f16(
 ; VI-NEXT:    v_max_f16_e64 v2, s5, s5
 ; VI-NEXT:    v_max_f16_e64 v3, s4, s4
 ; VI-NEXT:    v_max_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v0, v0, v2
 ; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;
@@ -867,12 +867,12 @@ define amdgpu_kernel void @fmax_v4f16_imm_a(
 ; VI-NEXT:    v_max_f16_sdwa v0, v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI-NEXT:    v_max_f16_e32 v1, 0x4200, v1
 ; VI-NEXT:    s_lshr_b32 s4, s4, 16
-; VI-NEXT:    v_or_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v1, v1, v0
 ; VI-NEXT:    v_max_f16_e32 v0, 0x4800, v2
 ; VI-NEXT:    v_max_f16_e64 v2, s4, s4
 ; VI-NEXT:    v_mov_b32_e32 v3, 0x4000
 ; VI-NEXT:    v_max_f16_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v0, v0, v2
 ; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
index 727ac7020e47..02130936cd82 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
@@ -378,7 +378,7 @@ define amdgpu_kernel void @minnum_v2f16_ieee(
 ; VI-NEXT:    v_max_f16_e64 v1, s5, s5
 ; VI-NEXT:    v_max_f16_e64 v2, s4, s4
 ; VI-NEXT:    v_min_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v0, v0, v1
 ; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;
@@ -501,7 +501,7 @@ define amdgpu_kernel void @minnum_v2f16_imm_a(
 ; VI-NEXT:    v_max_f16_e64 v1, s4, s4
 ; VI-NEXT:    v_min_f16_e32 v0, 0x4200, v0
 ; VI-NEXT:    v_min_f16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v0, v0, v1
 ; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;
@@ -581,7 +581,7 @@ define amdgpu_kernel void @minnum_v2f16_imm_b(
 ; VI-NEXT:    v_max_f16_e64 v1, s4, s4
 ; VI-NEXT:    v_min_f16_e32 v0, 4.0, v0
 ; VI-NEXT:    v_min_f16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v0, v0, v1
 ; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;
@@ -682,7 +682,7 @@ define amdgpu_kernel void @minnum_v3f16(
 ; VI-NEXT:    v_max_f16_e64 v1, s6, s6
 ; VI-NEXT:    v_max_f16_e64 v2, s4, s4
 ; VI-NEXT:    v_min_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v0, v0, v1
 ; VI-NEXT:    v_max_f16_e64 v1, s7, s7
 ; VI-NEXT:    v_max_f16_e64 v2, s5, s5
 ; VI-NEXT:    v_min_f16_e32 v1, v2, v1
@@ -812,7 +812,7 @@ define amdgpu_kernel void @minnum_v4f16(
 ; VI-NEXT:    v_max_f16_e64 v2, s5, s5
 ; VI-NEXT:    v_max_f16_e64 v1, s7, s7
 ; VI-NEXT:    v_min_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT:    v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v1, v0, v1
 ; VI-NEXT:    v_max_f16_e64 v2, s4, s4
 ; VI-NEXT:    v_max_f16_e64 v0, s6, s6
 ; VI-NEXT:    s_lshr_b32 s4, s4, 16
@@ -821,7 +821,7 @@ define amdgpu_kernel void @minnum_v4f16(
 ; VI-NEXT:    v_max_f16_e64 v2, s5, s5
 ; VI-NEXT:    v_max_f16_e64 v3, s4, s4
 ; VI-NEXT:    v_min_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v0, v0, v2
 ; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;
@@ -930,12 +930,12 @@ define amdgpu_kernel void @fmin_v4f16_imm_a(
 ; VI-NEXT:    v_min_f16_sdwa v0, v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI-NEXT:    v_min_f16_e32 v1, 0x4200, v1
 ; VI-NEXT:    s_lshr_b32 s4, s4, 16
-; VI-NEXT:    v_or_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v1, v1, v0
 ; VI-NEXT:    v_min_f16_e32 v0, 0x4800, v2
 ; VI-NEXT:    v_max_f16_e64 v2, s4, s4
 ; VI-NEXT:    v_mov_b32_e32 v3, 0x4000
 ; VI-NEXT:    v_min_f16_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v0, v0, v2
 ; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;

diff  --git a/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll b/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll
index ed2202c51028..789aa514695e 100644
--- a/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll
+++ b/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll
@@ -248,7 +248,7 @@ define i32 @zext_div_fixup_f16(half %x, half %y, half %z) {
 ; GFX8-NEXT: s_setpc_b64
 
 ; GFX9: v_cvt_f16_f32_e32 v0, v0
-; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT: s_setpc_b64
 
 ; GFX10: v_cvt_f16_f32_e32 v0, v0
 ; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
@@ -260,9 +260,13 @@ define i32 @zext_fptrunc_f16(float %x) {
 }
 
 ; GCN-LABEL: {{^}}zext_fptrunc_fma_f16:
+; GFX8: v_fma_f32 v0, v0, v1, v2
+; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX8-NEXT: s_setpc_b64
+
 ; GFX900: v_fma_f32 v0, v0, v1, v2
 ; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX900-NEXT: s_setpc_b64
 
 ; GFX906: v_fma_mixlo_f16 v0, v0, v1, v2
 ; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0

diff  --git a/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll
index d504b1901303..ec3f62238f3f 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll
@@ -67,7 +67,7 @@ define <2 x half> @v_constained_fadd_v2f16_fpexcept_strict(<2 x half> %x, <2 x h
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_add_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-NEXT:    v_add_f16_e32 v0, v0, v1
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_constained_fadd_v2f16_fpexcept_strict:
@@ -92,7 +92,7 @@ define <2 x half> @v_constained_fadd_v2f16_fpexcept_ignore(<2 x half> %x, <2 x h
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_add_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-NEXT:    v_add_f16_e32 v0, v0, v1
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_constained_fadd_v2f16_fpexcept_ignore:
@@ -117,7 +117,7 @@ define <2 x half> @v_constained_fadd_v2f16_fpexcept_maytrap(<2 x half> %x, <2 x
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_add_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-NEXT:    v_add_f16_e32 v0, v0, v1
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_constained_fadd_v2f16_fpexcept_maytrap:
@@ -143,7 +143,7 @@ define <3 x half> @v_constained_fadd_v3f16_fpexcept_strict(<3 x half> %x, <3 x h
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_add_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-NEXT:    v_add_f16_e32 v0, v0, v2
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
 ; GFX8-NEXT:    v_add_f16_e32 v1, v1, v3
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -164,12 +164,9 @@ define <4 x half> @v_constained_fadd_v4f16_fpexcept_strict(<4 x half> %x, <4 x h
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_add_f16_sdwa v4, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT:    v_add_f16_e32 v1, v1, v3
 ; GFX9-NEXT:    v_add_f16_sdwa v5, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX9-NEXT:    v_add_f16_e32 v0, v0, v2
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
-; GFX9-NEXT:    v_add_f16_e32 v1, v1, v3
-; GFX9-NEXT:    v_and_b32_e32 v0, v2, v0
-; GFX9-NEXT:    v_and_b32_e32 v1, v2, v1
 ; GFX9-NEXT:    v_lshl_or_b32 v0, v5, 16, v0
 ; GFX9-NEXT:    v_lshl_or_b32 v1, v4, 16, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -181,8 +178,8 @@ define <4 x half> @v_constained_fadd_v4f16_fpexcept_strict(<4 x half> %x, <4 x h
 ; GFX8-NEXT:    v_add_f16_e32 v1, v1, v3
 ; GFX8-NEXT:    v_add_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-NEXT:    v_add_f16_e32 v0, v0, v2
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v5
+; GFX8-NEXT:    v_or_b32_e32 v1, v1, v4
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_constained_fadd_v4f16_fpexcept_strict:
@@ -234,7 +231,7 @@ define amdgpu_ps <2 x half> @s_constained_fadd_v2f16_fpexcept_strict(<2 x half>
 ; GFX8-NEXT:    v_add_f16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX8-NEXT:    v_add_f16_e32 v1, s2, v1
-; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    ; return to shader part epilog
 ;
 ; GFX10-LABEL: s_constained_fadd_v2f16_fpexcept_strict:

diff  --git a/llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll
index 110e65144e0d..0aa92534c437 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll
@@ -37,7 +37,7 @@ define <2 x half> @v_constained_fma_v2f16_fpexcept_strict(<2 x half> %x, <2 x ha
 ; GFX8-NEXT:    v_fma_f16 v3, v5, v4, v3
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; GFX8-NEXT:    v_fma_f16 v0, v0, v1, v2
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v3
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_constained_fma_v2f16_fpexcept_strict:
@@ -67,7 +67,7 @@ define <3 x half> @v_constained_fma_v3f16_fpexcept_strict(<3 x half> %x, <3 x ha
 ; GFX8-NEXT:    v_fma_f16 v6, v8, v7, v6
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v6, 16, v6
 ; GFX8-NEXT:    v_fma_f16 v0, v0, v2, v4
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v6
 ; GFX8-NEXT:    v_fma_f16 v1, v1, v3, v5
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -117,10 +117,10 @@ define <4 x half> @v_constained_fma_v4f16_fpexcept_strict(<4 x half> %x, <4 x ha
 ; GFX8-NEXT:    v_fma_f16 v7, v9, v8, v7
 ; GFX8-NEXT:    v_fma_f16 v0, v0, v2, v4
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v7
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-NEXT:    v_fma_f16 v1, v1, v3, v5
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v6
-; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_constained_fma_v4f16_fpexcept_strict:
@@ -221,7 +221,7 @@ define <2 x half> @v_constained_fma_v2f16_fpexcept_strict_fneg_fneg(<2 x half> %
 ; GFX8-NEXT:    v_fma_f16 v3, -v5, -v4, v3
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; GFX8-NEXT:    v_fma_f16 v0, -v0, -v1, v2
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v3
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_constained_fma_v2f16_fpexcept_strict_fneg_fneg:

diff  --git a/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll
index de3a40b50a41..4019e39df83f 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll
@@ -67,7 +67,7 @@ define <2 x half> @v_constained_fmul_v2f16_fpexcept_strict(<2 x half> %x, <2 x h
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_mul_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-NEXT:    v_mul_f16_e32 v0, v0, v1
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_constained_fmul_v2f16_fpexcept_strict:
@@ -92,7 +92,7 @@ define <2 x half> @v_constained_fmul_v2f16_fpexcept_ignore(<2 x half> %x, <2 x h
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_mul_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-NEXT:    v_mul_f16_e32 v0, v0, v1
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_constained_fmul_v2f16_fpexcept_ignore:
@@ -117,7 +117,7 @@ define <2 x half> @v_constained_fmul_v2f16_fpexcept_maytrap(<2 x half> %x, <2 x
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_mul_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-NEXT:    v_mul_f16_e32 v0, v0, v1
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_constained_fmul_v2f16_fpexcept_maytrap:
@@ -143,7 +143,7 @@ define <3 x half> @v_constained_fmul_v3f16_fpexcept_strict(<3 x half> %x, <3 x h
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_mul_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-NEXT:    v_mul_f16_e32 v0, v0, v2
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
 ; GFX8-NEXT:    v_mul_f16_e32 v1, v1, v3
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -164,12 +164,9 @@ define <4 x half> @v_constained_fmul_v4f16_fpexcept_strict(<4 x half> %x, <4 x h
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_mul_f16_sdwa v4, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT:    v_mul_f16_e32 v1, v1, v3
 ; GFX9-NEXT:    v_mul_f16_sdwa v5, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX9-NEXT:    v_mul_f16_e32 v0, v0, v2
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
-; GFX9-NEXT:    v_mul_f16_e32 v1, v1, v3
-; GFX9-NEXT:    v_and_b32_e32 v0, v2, v0
-; GFX9-NEXT:    v_and_b32_e32 v1, v2, v1
 ; GFX9-NEXT:    v_lshl_or_b32 v0, v5, 16, v0
 ; GFX9-NEXT:    v_lshl_or_b32 v1, v4, 16, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -181,8 +178,8 @@ define <4 x half> @v_constained_fmul_v4f16_fpexcept_strict(<4 x half> %x, <4 x h
 ; GFX8-NEXT:    v_mul_f16_e32 v1, v1, v3
 ; GFX8-NEXT:    v_mul_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-NEXT:    v_mul_f16_e32 v0, v0, v2
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v5
+; GFX8-NEXT:    v_or_b32_e32 v1, v1, v4
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_constained_fmul_v4f16_fpexcept_strict:
@@ -234,7 +231,7 @@ define amdgpu_ps <2 x half> @s_constained_fmul_v2f16_fpexcept_strict(<2 x half>
 ; GFX8-NEXT:    v_mul_f16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX8-NEXT:    v_mul_f16_e32 v1, s2, v1
-; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    ; return to shader part epilog
 ;
 ; GFX10-LABEL: s_constained_fmul_v2f16_fpexcept_strict:

diff  --git a/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
index ddbf4f3727e4..73e2b5540865 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
@@ -61,7 +61,6 @@ define <2 x half> @v_constained_fsub_v2f16_fpexcept_strict(<2 x half> %x, <2 x h
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX9-NEXT:    v_sub_f16_e32 v0, v0, v1
-; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX9-NEXT:    v_lshl_or_b32 v0, v2, 16, v0
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -70,7 +69,7 @@ define <2 x half> @v_constained_fsub_v2f16_fpexcept_strict(<2 x half> %x, <2 x h
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_sub_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-NEXT:    v_sub_f16_e32 v0, v0, v1
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_constained_fsub_v2f16_fpexcept_strict:
@@ -92,7 +91,6 @@ define <2 x half> @v_constained_fsub_v2f16_fpexcept_ignore(<2 x half> %x, <2 x h
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX9-NEXT:    v_sub_f16_e32 v0, v0, v1
-; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX9-NEXT:    v_lshl_or_b32 v0, v2, 16, v0
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -101,7 +99,7 @@ define <2 x half> @v_constained_fsub_v2f16_fpexcept_ignore(<2 x half> %x, <2 x h
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_sub_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-NEXT:    v_sub_f16_e32 v0, v0, v1
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_constained_fsub_v2f16_fpexcept_ignore:
@@ -123,7 +121,6 @@ define <2 x half> @v_constained_fsub_v2f16_fpexcept_maytrap(<2 x half> %x, <2 x
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX9-NEXT:    v_sub_f16_e32 v0, v0, v1
-; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX9-NEXT:    v_lshl_or_b32 v0, v2, 16, v0
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -132,7 +129,7 @@ define <2 x half> @v_constained_fsub_v2f16_fpexcept_maytrap(<2 x half> %x, <2 x
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_sub_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-NEXT:    v_sub_f16_e32 v0, v0, v1
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap:
@@ -154,7 +151,6 @@ define <3 x half> @v_constained_fsub_v3f16_fpexcept_strict(<3 x half> %x, <3 x h
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_sub_f16_sdwa v4, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX9-NEXT:    v_sub_f16_e32 v0, v0, v2
-; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX9-NEXT:    v_lshl_or_b32 v0, v4, 16, v0
 ; GFX9-NEXT:    v_sub_f16_e32 v1, v1, v3
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -164,7 +160,7 @@ define <3 x half> @v_constained_fsub_v3f16_fpexcept_strict(<3 x half> %x, <3 x h
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_sub_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-NEXT:    v_sub_f16_e32 v0, v0, v2
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
 ; GFX8-NEXT:    v_sub_f16_e32 v1, v1, v3
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -188,12 +184,9 @@ define <4 x half> @v_constained_fsub_v4f16_fpexcept_strict(<4 x half> %x, <4 x h
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_sub_f16_sdwa v4, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT:    v_sub_f16_e32 v1, v1, v3
 ; GFX9-NEXT:    v_sub_f16_sdwa v5, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX9-NEXT:    v_sub_f16_e32 v0, v0, v2
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
-; GFX9-NEXT:    v_sub_f16_e32 v1, v1, v3
-; GFX9-NEXT:    v_and_b32_e32 v0, v2, v0
-; GFX9-NEXT:    v_and_b32_e32 v1, v2, v1
 ; GFX9-NEXT:    v_lshl_or_b32 v0, v5, 16, v0
 ; GFX9-NEXT:    v_lshl_or_b32 v1, v4, 16, v1
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
@@ -205,8 +198,8 @@ define <4 x half> @v_constained_fsub_v4f16_fpexcept_strict(<4 x half> %x, <4 x h
 ; GFX8-NEXT:    v_sub_f16_e32 v1, v1, v3
 ; GFX8-NEXT:    v_sub_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-NEXT:    v_sub_f16_e32 v0, v0, v2
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v5
+; GFX8-NEXT:    v_or_b32_e32 v1, v1, v4
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_constained_fsub_v4f16_fpexcept_strict:
@@ -245,13 +238,12 @@ define amdgpu_ps half @s_constained_fsub_f16_fpexcept_strict(half inreg %x, half
 define amdgpu_ps <2 x half> @s_constained_fsub_v2f16_fpexcept_strict(<2 x half> inreg %x, <2 x half> inreg %y) #0 {
 ; GFX9-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX9-NEXT:    s_lshr_b32 s0, s3, 16
-; GFX9-NEXT:    v_sub_f16_e32 v1, s2, v1
+; GFX9-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX9-NEXT:    s_lshr_b32 s1, s2, 16
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NEXT:    v_sub_f16_e32 v0, s1, v0
-; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX9-NEXT:    v_sub_f16_e32 v1, s2, v1
 ; GFX9-NEXT:    v_lshl_or_b32 v0, v0, 16, v1
 ; GFX9-NEXT:    ; return to shader part epilog
 ;
@@ -264,7 +256,7 @@ define amdgpu_ps <2 x half> @s_constained_fsub_v2f16_fpexcept_strict(<2 x half>
 ; GFX8-NEXT:    v_sub_f16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX8-NEXT:    v_sub_f16_e32 v1, s2, v1
-; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    ; return to shader part epilog
 ;
 ; GFX10-LABEL: s_constained_fsub_v2f16_fpexcept_strict:

diff  --git a/llvm/test/CodeGen/AMDGPU/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/uaddsat.ll
index 5082772fae98..3a50f89dbaba 100644
--- a/llvm/test/CodeGen/AMDGPU/uaddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/uaddsat.ll
@@ -104,7 +104,7 @@ define <2 x i16> @v_uaddsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_add_u16_sdwa v2, v0, v1 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-NEXT:    v_add_u16_e64 v0, v0, v1 clamp
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_uaddsat_v2i16:
@@ -145,7 +145,7 @@ define <3 x i16> @v_uaddsat_v3i16(<3 x i16> %lhs, <3 x i16> %rhs) {
 ; GFX8-NEXT:    v_add_u16_sdwa v4, v0, v2 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-NEXT:    v_add_u16_e64 v0, v0, v2 clamp
 ; GFX8-NEXT:    v_add_u16_e64 v1, v1, v3 clamp
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_uaddsat_v3i16:
@@ -192,8 +192,8 @@ define <2 x float> @v_uaddsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 ; GFX8-NEXT:    v_add_u16_e64 v0, v0, v2 clamp
 ; GFX8-NEXT:    v_add_u16_sdwa v2, v1, v3 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-NEXT:    v_add_u16_e64 v1, v1, v3 clamp
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
+; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_uaddsat_v4i16:

diff  --git a/llvm/test/CodeGen/AMDGPU/usubsat.ll b/llvm/test/CodeGen/AMDGPU/usubsat.ll
index 49daf7108d73..c1062c82ba5a 100644
--- a/llvm/test/CodeGen/AMDGPU/usubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/usubsat.ll
@@ -126,7 +126,7 @@ define <2 x i16> @v_usubsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_sub_u16_sdwa v2, v0, v1 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-NEXT:    v_sub_u16_e64 v0, v0, v1 clamp
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_usubsat_v2i16:
@@ -173,7 +173,7 @@ define <3 x i16> @v_usubsat_v3i16(<3 x i16> %lhs, <3 x i16> %rhs) {
 ; GFX8-NEXT:    v_sub_u16_sdwa v4, v0, v2 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-NEXT:    v_sub_u16_e64 v0, v0, v2 clamp
 ; GFX8-NEXT:    v_sub_u16_e64 v1, v1, v3 clamp
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_usubsat_v3i16:
@@ -228,8 +228,8 @@ define <2 x float> @v_usubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 ; GFX8-NEXT:    v_sub_u16_e64 v0, v0, v2 clamp
 ; GFX8-NEXT:    v_sub_u16_sdwa v2, v1, v3 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; GFX8-NEXT:    v_sub_u16_e64 v1, v1, v3 clamp
-; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v0, v0, v4
+; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: v_usubsat_v4i16:


        


More information about the llvm-commits mailing list