[llvm] 39f8a79 - AMDGPU: Try to eliminate clearing of high bits of 16-bit instructions
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 22 10:42:55 PDT 2021
Author: Matt Arsenault
Date: 2021-06-22T13:42:49-04:00
New Revision: 39f8a792f0ac4efed11ac906ba76137fc0c9f6a8
URL: https://github.com/llvm/llvm-project/commit/39f8a792f0ac4efed11ac906ba76137fc0c9f6a8
DIFF: https://github.com/llvm/llvm-project/commit/39f8a792f0ac4efed11ac906ba76137fc0c9f6a8.diff
LOG: AMDGPU: Try to eliminate clearing of high bits of 16-bit instructions
These used to consistently be zeroed pre-gfx9, but gfx9 made the
situation complicated since now some still do and some don't. This
also manages to pick up a few cases that the pattern fails to optimize
away.
We handle some cases with instruction patterns, but some get
through. In particular this improves the integer cases.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/lib/Target/AMDGPU/GCNSubtarget.h
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
llvm/test/CodeGen/AMDGPU/fmax3.ll
llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
llvm/test/CodeGen/AMDGPU/fmin3.ll
llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
llvm/test/CodeGen/AMDGPU/high-bits-zeroed-16-bit-ops.mir
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.f16.ll
llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
llvm/test/CodeGen/AMDGPU/preserve-hi16.ll
llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll
llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll
llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll
llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
llvm/test/CodeGen/AMDGPU/uaddsat.ll
llvm/test/CodeGen/AMDGPU/usubsat.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 1728f4725858..a4636518522d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -353,6 +353,105 @@ unsigned GCNSubtarget::getConstantBusLimit(unsigned Opcode) const {
return 2;
}
+/// This list was mostly derived from experimentation.
+bool GCNSubtarget::zeroesHigh16BitsOfDest(unsigned Opcode) const {
+ switch (Opcode) {
+ case AMDGPU::V_CVT_F16_F32_e32:
+ case AMDGPU::V_CVT_F16_F32_e64:
+ case AMDGPU::V_CVT_F16_U16_e32:
+ case AMDGPU::V_CVT_F16_U16_e64:
+ case AMDGPU::V_CVT_F16_I16_e32:
+ case AMDGPU::V_CVT_F16_I16_e64:
+ case AMDGPU::V_RCP_F16_e64:
+ case AMDGPU::V_RCP_F16_e32:
+ case AMDGPU::V_RSQ_F16_e64:
+ case AMDGPU::V_RSQ_F16_e32:
+ case AMDGPU::V_SQRT_F16_e64:
+ case AMDGPU::V_SQRT_F16_e32:
+ case AMDGPU::V_LOG_F16_e64:
+ case AMDGPU::V_LOG_F16_e32:
+ case AMDGPU::V_EXP_F16_e64:
+ case AMDGPU::V_EXP_F16_e32:
+ case AMDGPU::V_SIN_F16_e64:
+ case AMDGPU::V_SIN_F16_e32:
+ case AMDGPU::V_COS_F16_e64:
+ case AMDGPU::V_COS_F16_e32:
+ case AMDGPU::V_FLOOR_F16_e64:
+ case AMDGPU::V_FLOOR_F16_e32:
+ case AMDGPU::V_CEIL_F16_e64:
+ case AMDGPU::V_CEIL_F16_e32:
+ case AMDGPU::V_TRUNC_F16_e64:
+ case AMDGPU::V_TRUNC_F16_e32:
+ case AMDGPU::V_RNDNE_F16_e64:
+ case AMDGPU::V_RNDNE_F16_e32:
+ case AMDGPU::V_FRACT_F16_e64:
+ case AMDGPU::V_FRACT_F16_e32:
+ case AMDGPU::V_FREXP_MANT_F16_e64:
+ case AMDGPU::V_FREXP_MANT_F16_e32:
+ case AMDGPU::V_FREXP_EXP_I16_F16_e64:
+ case AMDGPU::V_FREXP_EXP_I16_F16_e32:
+ case AMDGPU::V_LDEXP_F16_e64:
+ case AMDGPU::V_LDEXP_F16_e32:
+ case AMDGPU::V_LSHLREV_B16_e64:
+ case AMDGPU::V_LSHLREV_B16_e32:
+ case AMDGPU::V_LSHRREV_B16_e64:
+ case AMDGPU::V_LSHRREV_B16_e32:
+ case AMDGPU::V_ASHRREV_I16_e64:
+ case AMDGPU::V_ASHRREV_I16_e32:
+ case AMDGPU::V_ADD_U16_e64:
+ case AMDGPU::V_ADD_U16_e32:
+ case AMDGPU::V_SUB_U16_e64:
+ case AMDGPU::V_SUB_U16_e32:
+ case AMDGPU::V_SUBREV_U16_e64:
+ case AMDGPU::V_SUBREV_U16_e32:
+ case AMDGPU::V_MUL_LO_U16_e64:
+ case AMDGPU::V_MUL_LO_U16_e32:
+ case AMDGPU::V_ADD_F16_e64:
+ case AMDGPU::V_ADD_F16_e32:
+ case AMDGPU::V_SUB_F16_e64:
+ case AMDGPU::V_SUB_F16_e32:
+ case AMDGPU::V_SUBREV_F16_e64:
+ case AMDGPU::V_SUBREV_F16_e32:
+ case AMDGPU::V_MUL_F16_e64:
+ case AMDGPU::V_MUL_F16_e32:
+ case AMDGPU::V_MAX_F16_e64:
+ case AMDGPU::V_MAX_F16_e32:
+ case AMDGPU::V_MIN_F16_e64:
+ case AMDGPU::V_MIN_F16_e32:
+ case AMDGPU::V_MAX_U16_e64:
+ case AMDGPU::V_MAX_U16_e32:
+ case AMDGPU::V_MIN_U16_e64:
+ case AMDGPU::V_MIN_U16_e32:
+ case AMDGPU::V_MAX_I16_e64:
+ case AMDGPU::V_MAX_I16_e32:
+ case AMDGPU::V_MIN_I16_e64:
+ case AMDGPU::V_MIN_I16_e32:
+ // On gfx10, all 16-bit instructions preserve the high bits.
+ return getGeneration() <= AMDGPUSubtarget::GFX9;
+ case AMDGPU::V_MAD_F16_e64:
+ case AMDGPU::V_MADAK_F16:
+ case AMDGPU::V_MADMK_F16:
+ case AMDGPU::V_MAC_F16_e64:
+ case AMDGPU::V_MAC_F16_e32:
+ case AMDGPU::V_FMAMK_F16:
+ case AMDGPU::V_FMAAK_F16:
+ case AMDGPU::V_MAD_U16_e64:
+ case AMDGPU::V_MAD_I16_e64:
+ case AMDGPU::V_FMA_F16_e64:
+ case AMDGPU::V_FMAC_F16_e64:
+ case AMDGPU::V_FMAC_F16_e32:
+ case AMDGPU::V_DIV_FIXUP_F16_e64:
+ // In gfx9, the preferred handling of the unused high 16-bits changed. Most
+ // instructions maintain the legacy behavior of 0ing. Some instructions
+ // changed to preserving the high bits.
+ return getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS;
+ case AMDGPU::V_MAD_MIXLO_F16:
+ case AMDGPU::V_MAD_MIXHI_F16:
+ default:
+ return false;
+ }
+}
+
unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
const Function &F) const {
if (NWaves == 1)
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 39abb00680b3..dc53568c1b9d 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -286,6 +286,11 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
unsigned getConstantBusLimit(unsigned Opcode) const;
+ /// Returns if the result of this instruction with a 16-bit result returned in
+ /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve
+ /// the original value.
+ bool zeroesHigh16BitsOfDest(unsigned Opcode) const;
+
bool hasIntClamp() const {
return HasIntClamp;
}
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index bf02637c394a..ad910522ba90 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -91,6 +91,7 @@ class SIFoldOperands : public MachineFunctionPass {
SmallVectorImpl<MachineInstr *> &CopiesToReplace) const;
bool tryFoldCndMask(MachineInstr &MI) const;
+ bool tryFoldZeroHighBits(MachineInstr &MI) const;
void foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const;
const MachineOperand *isClamp(const MachineInstr &MI) const;
@@ -1188,6 +1189,27 @@ bool SIFoldOperands::tryFoldCndMask(MachineInstr &MI) const {
return true;
}
+bool SIFoldOperands::tryFoldZeroHighBits(MachineInstr &MI) const {
+ if (MI.getOpcode() != AMDGPU::V_AND_B32_e64 &&
+ MI.getOpcode() != AMDGPU::V_AND_B32_e32)
+ return false;
+
+ MachineOperand *Src0 = getImmOrMaterializedImm(*MRI, MI.getOperand(1));
+ if (!Src0->isImm() || Src0->getImm() != 0xffff)
+ return false;
+
+ Register Src1 = MI.getOperand(2).getReg();
+ MachineInstr *SrcDef = MRI->getVRegDef(Src1);
+ if (ST->zeroesHigh16BitsOfDest(SrcDef->getOpcode())) {
+ Register Dst = MI.getOperand(0).getReg();
+ MRI->replaceRegWith(Dst, SrcDef->getOperand(0).getReg());
+ MI.eraseFromParent();
+ return true;
+ }
+
+ return false;
+}
+
void SIFoldOperands::foldInstOperand(MachineInstr &MI,
MachineOperand &OpToFold) const {
// We need mutate the operands of new mov instructions to add implicit
@@ -1721,6 +1743,9 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
for (auto &MI : make_early_inc_range(*MBB)) {
tryFoldCndMask(MI);
+ if (tryFoldZeroHighBits(MI))
+ continue;
+
if (MI.isRegSequence() && tryFoldRegSequence(MI))
continue;
diff --git a/llvm/test/CodeGen/AMDGPU/fmax3.ll b/llvm/test/CodeGen/AMDGPU/fmax3.ll
index 5a92eac7f32f..a3194a749be0 100644
--- a/llvm/test/CodeGen/AMDGPU/fmax3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmax3.ll
@@ -113,7 +113,7 @@ define amdgpu_kernel void @test_fmax3_olt_1_f16(half addrspace(1)* %out, half ad
; VI-NEXT: v_max_f16_e32 v0, v2, v0
; VI-NEXT: v_max_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI-NEXT: v_max_f16_e32 v0, v0, v3
-; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: v_or_b32_e32 v0, v0, v1
; VI-NEXT: s_setpc_b64
; GFX9: s_waitcnt
diff --git a/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll b/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
index 8ca2d57f6ead..23d0971e2be7 100644
--- a/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
@@ -97,7 +97,7 @@ define <2 x half> @test_fmax_legacy_ugt_v2f16(<2 x half> %a, <2 x half> %b) #0 {
; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NNAN-NEXT: v_max_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-NNAN-NEXT: v_max_f16_e32 v0, v0, v1
-; VI-NNAN-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v2
; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
;
; SI-SAFE-LABEL: test_fmax_legacy_ugt_v2f16:
@@ -178,7 +178,7 @@ define <3 x half> @test_fmax_legacy_ugt_v3f16(<3 x half> %a, <3 x half> %b) #0 {
; VI-NNAN-NEXT: v_max_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-NNAN-NEXT: v_max_f16_e32 v0, v0, v2
; VI-NNAN-NEXT: v_max_f16_e32 v1, v1, v3
-; VI-NNAN-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v4
; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
;
; SI-SAFE-LABEL: test_fmax_legacy_ugt_v3f16:
@@ -283,8 +283,8 @@ define <4 x half> @test_fmax_legacy_ugt_v4f16(<4 x half> %a, <4 x half> %b) #0 {
; VI-NNAN-NEXT: v_max_f16_e32 v1, v1, v3
; VI-NNAN-NEXT: v_max_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-NNAN-NEXT: v_max_f16_e32 v0, v0, v2
-; VI-NNAN-NEXT: v_or_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NNAN-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v5
+; VI-NNAN-NEXT: v_or_b32_e32 v1, v1, v4
; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
;
; SI-SAFE-LABEL: test_fmax_legacy_ugt_v4f16:
@@ -437,10 +437,10 @@ define <8 x half> @test_fmax_legacy_ugt_v8f16(<8 x half> %a, <8 x half> %b) #0 {
; VI-NNAN-NEXT: v_max_f16_e32 v1, v1, v5
; VI-NNAN-NEXT: v_max_f16_sdwa v11, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-NNAN-NEXT: v_max_f16_e32 v0, v0, v4
-; VI-NNAN-NEXT: v_or_b32_sdwa v0, v0, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NNAN-NEXT: v_or_b32_sdwa v1, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NNAN-NEXT: v_or_b32_sdwa v2, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NNAN-NEXT: v_or_b32_sdwa v3, v3, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v11
+; VI-NNAN-NEXT: v_or_b32_e32 v1, v1, v10
+; VI-NNAN-NEXT: v_or_b32_e32 v2, v2, v9
+; VI-NNAN-NEXT: v_or_b32_e32 v3, v3, v8
; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
;
; SI-SAFE-LABEL: test_fmax_legacy_ugt_v8f16:
diff --git a/llvm/test/CodeGen/AMDGPU/fmin3.ll b/llvm/test/CodeGen/AMDGPU/fmin3.ll
index 0c3b04d55d24..f891b326708e 100644
--- a/llvm/test/CodeGen/AMDGPU/fmin3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmin3.ll
@@ -102,7 +102,7 @@ define amdgpu_kernel void @test_fmin3_olt_1_f16(half addrspace(1)* %out, half ad
; VI-NEXT: v_min_f16_e32 v0, v2, v0
; VI-NEXT: v_min_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI-NEXT: v_min_f16_e32 v0, v0, v3
-; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: v_or_b32_e32 v0, v0, v1
; VI-NEXT: s_setpc_b64
; GFX9: s_waitcnt
diff --git a/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll b/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
index bf4b93cd4dfc..22773ac06c12 100644
--- a/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
@@ -98,7 +98,7 @@ define <2 x half> @test_fmin_legacy_ule_v2f16(<2 x half> %a, <2 x half> %b) #0 {
; VI-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NNAN-NEXT: v_min_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-NNAN-NEXT: v_min_f16_e32 v0, v0, v1
-; VI-NNAN-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v2
; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
;
; SI-SAFE-LABEL: test_fmin_legacy_ule_v2f16:
@@ -179,7 +179,7 @@ define <3 x half> @test_fmin_legacy_ule_v3f16(<3 x half> %a, <3 x half> %b) #0 {
; VI-NNAN-NEXT: v_min_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-NNAN-NEXT: v_min_f16_e32 v0, v0, v2
; VI-NNAN-NEXT: v_min_f16_e32 v1, v1, v3
-; VI-NNAN-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v4
; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
;
; SI-SAFE-LABEL: test_fmin_legacy_ule_v3f16:
@@ -284,8 +284,8 @@ define <4 x half> @test_fmin_legacy_ule_v4f16(<4 x half> %a, <4 x half> %b) #0 {
; VI-NNAN-NEXT: v_min_f16_e32 v1, v1, v3
; VI-NNAN-NEXT: v_min_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-NNAN-NEXT: v_min_f16_e32 v0, v0, v2
-; VI-NNAN-NEXT: v_or_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NNAN-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v5
+; VI-NNAN-NEXT: v_or_b32_e32 v1, v1, v4
; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
;
; SI-SAFE-LABEL: test_fmin_legacy_ule_v4f16:
@@ -438,10 +438,10 @@ define <8 x half> @test_fmin_legacy_ule_v8f16(<8 x half> %a, <8 x half> %b) #0 {
; VI-NNAN-NEXT: v_min_f16_e32 v1, v1, v5
; VI-NNAN-NEXT: v_min_f16_sdwa v11, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; VI-NNAN-NEXT: v_min_f16_e32 v0, v0, v4
-; VI-NNAN-NEXT: v_or_b32_sdwa v0, v0, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NNAN-NEXT: v_or_b32_sdwa v1, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NNAN-NEXT: v_or_b32_sdwa v2, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NNAN-NEXT: v_or_b32_sdwa v3, v3, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NNAN-NEXT: v_or_b32_e32 v0, v0, v11
+; VI-NNAN-NEXT: v_or_b32_e32 v1, v1, v10
+; VI-NNAN-NEXT: v_or_b32_e32 v2, v2, v9
+; VI-NNAN-NEXT: v_or_b32_e32 v3, v3, v8
; VI-NNAN-NEXT: s_setpc_b64 s[30:31]
;
; SI-SAFE-LABEL: test_fmin_legacy_ule_v8f16:
diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
index abdfd2c9c677..469cfe96fb6d 100644
--- a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
@@ -73,8 +73,7 @@ entry:
; SIVI: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_0]], v[[R_F16_HI]]
; GFX9-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[A_F32_1]]
-; GFX9: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]]
-; GFX9: v_lshl_or_b32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], 16, v[[R_F16_LO]]
+; GFX9: v_lshl_or_b32 v[[R_V2_F16:[0-9]+]], v[[R_F16_1]], 16, v[[R_F16_0]]
; GCN: buffer_store_dword v[[R_V2_F16]]
@@ -141,7 +140,7 @@ entry:
; GCN: buffer_load_dword v[[A_F32:[0-9]+]]
; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]]
; SIVI-NOT: v[[R_F16]]
-; GFX9-NEXT: v_and_b32_e32 v[[R_F16]], 0xffff, v[[R_F16]]
+; GFX9-NOT: v_and_b32
; GCN: buffer_store_dword v[[R_F16]]
define amdgpu_kernel void @fptrunc_f32_to_f16_zext_i32(
i32 addrspace(1)* %r,
@@ -159,7 +158,7 @@ entry:
; GCN: buffer_load_dword v[[A_F32:[0-9]+]]
; GCN: v_cvt_f16_f32_e64 v[[R_F16:[0-9]+]], |v[[A_F32]]|
; SIVI-NOT: v[[R_F16]]
-; GFX9-NEXT: v_and_b32_e32 v[[R_F16]], 0xffff, v[[R_F16]]
+; GFX9-NOT: v_and_b32
; GCN: buffer_store_dword v[[R_F16]]
define amdgpu_kernel void @fptrunc_fabs_f32_to_f16_zext_i32(
i32 addrspace(1)* %r,
diff --git a/llvm/test/CodeGen/AMDGPU/high-bits-zeroed-16-bit-ops.mir b/llvm/test/CodeGen/AMDGPU/high-bits-zeroed-16-bit-ops.mir
index 769450629653..b6aa5f670a58 100644
--- a/llvm/test/CodeGen/AMDGPU/high-bits-zeroed-16-bit-ops.mir
+++ b/llvm/test/CodeGen/AMDGPU/high-bits-zeroed-16-bit-ops.mir
@@ -14,26 +14,20 @@ body: |
; GFX8: liveins: $vgpr0
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, killed [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX8: %and0:vgpr_32 = V_AND_B32_e32 65535, %op, implicit $exec
; GFX8: %smask:sreg_32 = S_MOV_B32 65535
- ; GFX8: %and1:vgpr_32 = V_AND_B32_e64 %smask, %op, implicit $exec
; GFX8: %vmask:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec
- ; GFX8: %and2:vgpr_32 = V_AND_B32_e64 %vmask, %op, implicit $exec
- ; GFX8: $vgpr0 = COPY %and0
- ; GFX8: $vgpr1 = COPY %and1
- ; GFX8: $vgpr2 = COPY %and2
+ ; GFX8: $vgpr0 = COPY %op
+ ; GFX8: $vgpr1 = COPY %op
+ ; GFX8: $vgpr2 = COPY %op
; GFX9-LABEL: name: v_cvt_f16_f32_altmask
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, killed [[COPY]], 0, 0, implicit $mode, implicit $exec
- ; GFX9: %and0:vgpr_32 = V_AND_B32_e32 65535, %op, implicit $exec
; GFX9: %smask:sreg_32 = S_MOV_B32 65535
- ; GFX9: %and1:vgpr_32 = V_AND_B32_e64 %smask, %op, implicit $exec
; GFX9: %vmask:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec
- ; GFX9: %and2:vgpr_32 = V_AND_B32_e64 %vmask, %op, implicit $exec
- ; GFX9: $vgpr0 = COPY %and0
- ; GFX9: $vgpr1 = COPY %and1
- ; GFX9: $vgpr2 = COPY %and2
+ ; GFX9: $vgpr0 = COPY %op
+ ; GFX9: $vgpr1 = COPY %op
+ ; GFX9: $vgpr2 = COPY %op
; GFX10-LABEL: name: v_cvt_f16_f32_altmask
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -103,19 +97,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop1
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_cvt_f16_f32
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_CVT_F16_F32_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop1
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_cvt_f16_f32
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -146,19 +136,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_CVT_F16_U16_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_CVT_F16_U16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop1
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_cvt_f16_u16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_CVT_F16_U16_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_CVT_F16_U16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop1
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_cvt_f16_u16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -189,19 +175,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_CVT_F16_I16_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_CVT_F16_I16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop1
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_cvt_f16_i16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_CVT_F16_I16_e64 [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_CVT_F16_I16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop1
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_cvt_f16_i16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -232,19 +214,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_RCP_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_RCP_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop1
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_rcp_f16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_RCP_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_RCP_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop1
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_rcp_f16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -274,19 +252,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_RSQ_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_RSQ_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop1
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_rsq_f16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_RSQ_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_RSQ_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop1
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_rsq_f16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -316,19 +290,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_SQRT_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_SQRT_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop1
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_sqrt_f16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_SQRT_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_SQRT_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop1
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_sqrt_f16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -359,19 +329,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_LOG_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_LOG_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop1
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_log_f16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_LOG_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_LOG_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop1
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_log_f16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -402,19 +368,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_EXP_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_EXP_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop1
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_exp_f16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_EXP_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_EXP_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop1
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_exp_f16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -445,19 +407,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_SIN_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_SIN_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop1
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_sin_f16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_SIN_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_SIN_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop1
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_sin_f16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -488,19 +446,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_COS_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_COS_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop1
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_cos_f16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_COS_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_COS_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop1
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_cos_f16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -531,19 +485,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_FLOOR_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop1
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_floor_f16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_FLOOR_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_FLOOR_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop1
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_floor_f16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -574,19 +524,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_CEIL_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_CEIL_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop1
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_ceil_f16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_CEIL_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_CEIL_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop1
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_ceil_f16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -617,19 +563,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_TRUNC_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_TRUNC_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop1
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_trunc_f16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_TRUNC_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_TRUNC_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop1
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_trunc_f16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -660,19 +602,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_RNDNE_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_RNDNE_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop1
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_rndne_f16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_RNDNE_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_RNDNE_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop1
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_rndne_f16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -703,19 +641,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_FRACT_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_FRACT_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop1
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_fract_f16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_FRACT_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_FRACT_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop1
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_fract_f16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -746,19 +680,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_FREXP_MANT_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_FREXP_MANT_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop1
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_frexp_mant_f16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_FREXP_MANT_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_FREXP_MANT_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop1
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_frexp_mant_f16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -789,19 +719,15 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_FREXP_EXP_I16_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_FREXP_EXP_I16_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop1
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_frexp_exp_f16
; GFX9: liveins: $vgpr0
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_FREXP_EXP_I16_F16_e64 0, [[COPY]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_FREXP_EXP_I16_F16_e32 [[COPY]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop1:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop1
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_frexp_exp_f16
; GFX10: liveins: $vgpr0
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -833,20 +759,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_LDEXP_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop2
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_ldexp_f16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_LDEXP_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_LDEXP_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop2
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_ldexp_f16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -880,20 +802,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_LSHLREV_B16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %op_vop2:vgpr_32 = nofpexcept V_LSHLREV_B16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop2
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop2
; GFX9-LABEL: name: v_lshlrev_b16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_LSHLREV_B16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %op_vop2:vgpr_32 = nofpexcept V_LSHLREV_B16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop2
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop2
; GFX10-LABEL: name: v_lshlrev_b16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -927,20 +845,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_LSHRREV_B16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %op_vop2:vgpr_32 = nofpexcept V_LSHRREV_B16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop2
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop2
; GFX9-LABEL: name: v_lshrrev_b16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_LSHRREV_B16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %op_vop2:vgpr_32 = nofpexcept V_LSHRREV_B16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop2
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop2
; GFX10-LABEL: name: v_lshrrev_b16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -974,20 +888,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_ASHRREV_I16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %op_vop2:vgpr_32 = nofpexcept V_ASHRREV_I16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop2
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop2
; GFX9-LABEL: name: v_ashrrev_i16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_ASHRREV_I16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %op_vop2:vgpr_32 = nofpexcept V_ASHRREV_I16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop2
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop2
; GFX10-LABEL: name: v_ashrrev_i16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1021,20 +931,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_ADD_U16_e64 [[COPY]], [[COPY1]], 0, implicit $mode, implicit $exec
; GFX8: %op_vop2:vgpr_32 = nofpexcept V_ADD_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop2
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop2
; GFX9-LABEL: name: v_add_u16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_ADD_U16_e64 [[COPY]], [[COPY1]], 0, implicit $mode, implicit $exec
; GFX9: %op_vop2:vgpr_32 = nofpexcept V_ADD_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop2
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop2
; GFX10-LABEL: name: v_add_u16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1068,20 +974,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_SUB_U16_e64 [[COPY]], [[COPY1]], 0, implicit $mode, implicit $exec
; GFX8: %op_vop2:vgpr_32 = nofpexcept V_SUB_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop2
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop2
; GFX9-LABEL: name: v_sub_u16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_SUB_U16_e64 [[COPY]], [[COPY1]], 0, implicit $mode, implicit $exec
; GFX9: %op_vop2:vgpr_32 = nofpexcept V_SUB_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop2
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop2
; GFX10-LABEL: name: v_sub_u16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1115,20 +1017,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_SUBREV_U16_e64 [[COPY]], [[COPY1]], 0, implicit $mode, implicit $exec
; GFX8: %op_vop2:vgpr_32 = nofpexcept V_SUBREV_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop2
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop2
; GFX9-LABEL: name: v_subrev_u16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_SUBREV_U16_e64 [[COPY]], [[COPY1]], 0, implicit $mode, implicit $exec
; GFX9: %op_vop2:vgpr_32 = nofpexcept V_SUBREV_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop2
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop2
; GFX10-LABEL: name: v_subrev_u16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1162,20 +1060,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_MUL_LO_U16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %op_vop2:vgpr_32 = nofpexcept V_MUL_LO_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop2
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop2
; GFX9-LABEL: name: v_mul_lo_u16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_MUL_LO_U16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %op_vop2:vgpr_32 = nofpexcept V_MUL_LO_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop2
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop2
; GFX10-LABEL: name: v_mul_lo_u16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1209,20 +1103,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop2
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_add_f16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop2
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_add_f16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1256,20 +1146,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop2
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_sub_f16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop2
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_sub_f16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1303,20 +1189,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop2
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_subrev_f16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop2
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_subrev_f16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1350,20 +1232,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop2
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_mul_f16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop2
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_mul_f16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1397,20 +1275,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop2
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_max_f16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop2
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_max_f16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1444,20 +1318,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX8: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop2
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop1
; GFX9-LABEL: name: v_min_f16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_ADD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec
; GFX9: %op_vop1:vgpr_32 = nofpexcept V_ADD_F16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop1, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop2
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop1
; GFX10-LABEL: name: v_min_f16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1491,20 +1361,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %op_vop2:vgpr_32 = nofpexcept V_MAX_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop2
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop2
; GFX9-LABEL: name: v_max_u16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %op_vop2:vgpr_32 = nofpexcept V_MAX_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop2
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop2
; GFX10-LABEL: name: v_max_u16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1537,20 +1403,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %op_vop2:vgpr_32 = nofpexcept V_MIN_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop2
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop2
; GFX9-LABEL: name: v_min_u16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %op_vop2:vgpr_32 = nofpexcept V_MIN_U16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop2
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop2
; GFX10-LABEL: name: v_min_u16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1584,20 +1446,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %op_vop2:vgpr_32 = nofpexcept V_MAX_I16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop2
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop2
; GFX9-LABEL: name: v_max_i16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %op_vop2:vgpr_32 = nofpexcept V_MAX_I16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop2
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop2
; GFX10-LABEL: name: v_max_i16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1630,20 +1488,16 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX8: %op_vop2:vgpr_32 = nofpexcept V_MIN_I16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop3
- ; GFX8: $vgpr1 = COPY %and_vop2
+ ; GFX8: $vgpr0 = COPY %op_vop3
+ ; GFX8: $vgpr1 = COPY %op_vop2
; GFX9-LABEL: name: v_min_i16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX9: %op_vop3:vgpr_32 = nofpexcept V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
; GFX9: %op_vop2:vgpr_32 = nofpexcept V_MIN_I16_e32 [[COPY]], [[COPY1]], implicit $mode, implicit $exec
- ; GFX9: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX9: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
- ; GFX9: $vgpr0 = COPY %and_vop3
- ; GFX9: $vgpr1 = COPY %and_vop2
+ ; GFX9: $vgpr0 = COPY %op_vop3
+ ; GFX9: $vgpr1 = COPY %op_vop2
; GFX10-LABEL: name: v_min_i16
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1677,8 +1531,7 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX8: %op:vgpr_32 = nofpexcept V_MAD_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
- ; GFX8: %and:vgpr_32 = V_AND_B32_e32 65535, %op, implicit $exec
- ; GFX8: $vgpr0 = COPY %and
+ ; GFX8: $vgpr0 = COPY %op
; GFX9-LABEL: name: v_mad_f16
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1716,8 +1569,7 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX8: %op:vgpr_32 = nofpexcept V_FMA_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
- ; GFX8: %and:vgpr_32 = V_AND_B32_e32 65535, %op, implicit $exec
- ; GFX8: $vgpr0 = COPY %and
+ ; GFX8: $vgpr0 = COPY %op
; GFX9-LABEL: name: v_fma_f16
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1755,8 +1607,7 @@ body: |
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX8: %op:vgpr_32 = nofpexcept V_DIV_FIXUP_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
- ; GFX8: %and:vgpr_32 = V_AND_B32_e32 65535, %op, implicit $exec
- ; GFX8: $vgpr0 = COPY %and
+ ; GFX8: $vgpr0 = COPY %op
; GFX9-LABEL: name: v_div_fixup_f16
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1793,8 +1644,7 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op:vgpr_32 = nofpexcept V_MADAK_F16 [[COPY]], [[COPY1]], 1234, implicit $mode, implicit $exec
- ; GFX8: %and:vgpr_32 = V_AND_B32_e32 65535, %op, implicit $exec
- ; GFX8: $vgpr0 = COPY %and
+ ; GFX8: $vgpr0 = COPY %op
; GFX9-LABEL: name: v_madak_f16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1828,8 +1678,7 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op:vgpr_32 = nofpexcept V_MADMK_F16 [[COPY]], 1234, [[COPY1]], implicit $mode, implicit $exec
- ; GFX8: %and:vgpr_32 = V_AND_B32_e32 65535, %op, implicit $exec
- ; GFX8: $vgpr0 = COPY %and
+ ; GFX8: $vgpr0 = COPY %op
; GFX9-LABEL: name: v_madmk_f16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1863,8 +1712,7 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op:vgpr_32 = nofpexcept V_FMAAK_F16 [[COPY]], [[COPY1]], 1234, implicit $mode, implicit $exec
- ; GFX8: %and:vgpr_32 = V_AND_B32_e32 65535, %op, implicit $exec
- ; GFX8: $vgpr0 = COPY %and
+ ; GFX8: $vgpr0 = COPY %op
; GFX9-LABEL: name: v_fmaak_f16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1898,8 +1746,7 @@ body: |
; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX8: %op:vgpr_32 = nofpexcept V_FMAMK_F16 [[COPY]], 1234, [[COPY1]], implicit $mode, implicit $exec
- ; GFX8: %and:vgpr_32 = V_AND_B32_e32 65535, %op, implicit $exec
- ; GFX8: $vgpr0 = COPY %and
+ ; GFX8: $vgpr0 = COPY %op
; GFX9-LABEL: name: v_fmamk_f16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1935,10 +1782,8 @@ body: |
; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX8: %op_vop2:vgpr_32 = nofpexcept V_MAC_F16_e32 [[COPY]], [[COPY1]], [[COPY2]], implicit $mode, implicit $exec
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_MAC_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
- ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop2
- ; GFX8: $vgpr0 = COPY %and_vop3
+ ; GFX8: $vgpr0 = COPY %op_vop2
+ ; GFX8: $vgpr0 = COPY %op_vop3
; GFX9-LABEL: name: v_mac_f16
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -1986,10 +1831,8 @@ body: |
; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX8: %op_vop2:vgpr_32 = nofpexcept V_FMAC_F16_e32 [[COPY]], [[COPY1]], [[COPY2]], implicit $mode, implicit $exec
; GFX8: %op_vop3:vgpr_32 = nofpexcept V_FMAC_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
- ; GFX8: %and_vop2:vgpr_32 = V_AND_B32_e32 65535, %op_vop2, implicit $exec
- ; GFX8: %and_vop3:vgpr_32 = V_AND_B32_e32 65535, %op_vop3, implicit $exec
- ; GFX8: $vgpr0 = COPY %and_vop2
- ; GFX8: $vgpr0 = COPY %and_vop3
+ ; GFX8: $vgpr0 = COPY %op_vop2
+ ; GFX8: $vgpr0 = COPY %op_vop3
; GFX9-LABEL: name: v_fmac_f16
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.f16.ll
index dc3eb4ce191e..ee07678e1b3a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.f16.ll
@@ -35,8 +35,7 @@ entry:
; GCN-LABEL: {{^}}frexp_exp_f16_zext
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
; VI: v_frexp_exp_i16_f16_e32 v[[R_I16:[0-9]+]], v[[A_F16]]
-; VI: v_and_b32_e32 v[[R_I32:[0-9]+]], 0xffff, v[[R_I16]]
-; GCN: buffer_store_dword v[[R_I32]]
+; GCN: buffer_store_dword v[[R_I16]]
define amdgpu_kernel void @frexp_exp_f16_zext(
i32 addrspace(1)* %r,
half addrspace(1)* %a) {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
index 7229c9959f85..20d86f53547c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
@@ -350,7 +350,7 @@ define amdgpu_kernel void @maxnum_v2f16(
; VI-NEXT: v_max_f16_e64 v1, s5, s5
; VI-NEXT: v_max_f16_e64 v2, s4, s4
; VI-NEXT: v_max_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: v_or_b32_e32 v0, v0, v1
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
;
@@ -438,7 +438,7 @@ define amdgpu_kernel void @maxnum_v2f16_imm_a(
; VI-NEXT: v_max_f16_e64 v1, s4, s4
; VI-NEXT: v_max_f16_e32 v0, 0x4200, v0
; VI-NEXT: v_max_f16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: v_or_b32_e32 v0, v0, v1
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
;
@@ -518,7 +518,7 @@ define amdgpu_kernel void @maxnum_v2f16_imm_b(
; VI-NEXT: v_max_f16_e64 v1, s4, s4
; VI-NEXT: v_max_f16_e32 v0, 4.0, v0
; VI-NEXT: v_max_f16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: v_or_b32_e32 v0, v0, v1
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
;
@@ -619,7 +619,7 @@ define amdgpu_kernel void @maxnum_v3f16(
; VI-NEXT: v_max_f16_e64 v1, s6, s6
; VI-NEXT: v_max_f16_e64 v2, s4, s4
; VI-NEXT: v_max_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: v_or_b32_e32 v0, v0, v1
; VI-NEXT: v_max_f16_e64 v1, s7, s7
; VI-NEXT: v_max_f16_e64 v2, s5, s5
; VI-NEXT: v_max_f16_e32 v1, v2, v1
@@ -749,7 +749,7 @@ define amdgpu_kernel void @maxnum_v4f16(
; VI-NEXT: v_max_f16_e64 v2, s5, s5
; VI-NEXT: v_max_f16_e64 v1, s7, s7
; VI-NEXT: v_max_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: v_or_b32_e32 v1, v0, v1
; VI-NEXT: v_max_f16_e64 v2, s4, s4
; VI-NEXT: v_max_f16_e64 v0, s6, s6
; VI-NEXT: s_lshr_b32 s4, s4, 16
@@ -758,7 +758,7 @@ define amdgpu_kernel void @maxnum_v4f16(
; VI-NEXT: v_max_f16_e64 v2, s5, s5
; VI-NEXT: v_max_f16_e64 v3, s4, s4
; VI-NEXT: v_max_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: v_or_b32_e32 v0, v0, v2
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
;
@@ -867,12 +867,12 @@ define amdgpu_kernel void @fmax_v4f16_imm_a(
; VI-NEXT: v_max_f16_sdwa v0, v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-NEXT: v_max_f16_e32 v1, 0x4200, v1
; VI-NEXT: s_lshr_b32 s4, s4, 16
-; VI-NEXT: v_or_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: v_or_b32_e32 v1, v1, v0
; VI-NEXT: v_max_f16_e32 v0, 0x4800, v2
; VI-NEXT: v_max_f16_e64 v2, s4, s4
; VI-NEXT: v_mov_b32_e32 v3, 0x4000
; VI-NEXT: v_max_f16_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: v_or_b32_e32 v0, v0, v2
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
index 727ac7020e47..02130936cd82 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
@@ -378,7 +378,7 @@ define amdgpu_kernel void @minnum_v2f16_ieee(
; VI-NEXT: v_max_f16_e64 v1, s5, s5
; VI-NEXT: v_max_f16_e64 v2, s4, s4
; VI-NEXT: v_min_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: v_or_b32_e32 v0, v0, v1
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
;
@@ -501,7 +501,7 @@ define amdgpu_kernel void @minnum_v2f16_imm_a(
; VI-NEXT: v_max_f16_e64 v1, s4, s4
; VI-NEXT: v_min_f16_e32 v0, 0x4200, v0
; VI-NEXT: v_min_f16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: v_or_b32_e32 v0, v0, v1
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
;
@@ -581,7 +581,7 @@ define amdgpu_kernel void @minnum_v2f16_imm_b(
; VI-NEXT: v_max_f16_e64 v1, s4, s4
; VI-NEXT: v_min_f16_e32 v0, 4.0, v0
; VI-NEXT: v_min_f16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: v_or_b32_e32 v0, v0, v1
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
;
@@ -682,7 +682,7 @@ define amdgpu_kernel void @minnum_v3f16(
; VI-NEXT: v_max_f16_e64 v1, s6, s6
; VI-NEXT: v_max_f16_e64 v2, s4, s4
; VI-NEXT: v_min_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: v_or_b32_e32 v0, v0, v1
; VI-NEXT: v_max_f16_e64 v1, s7, s7
; VI-NEXT: v_max_f16_e64 v2, s5, s5
; VI-NEXT: v_min_f16_e32 v1, v2, v1
@@ -812,7 +812,7 @@ define amdgpu_kernel void @minnum_v4f16(
; VI-NEXT: v_max_f16_e64 v2, s5, s5
; VI-NEXT: v_max_f16_e64 v1, s7, s7
; VI-NEXT: v_min_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT: v_or_b32_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: v_or_b32_e32 v1, v0, v1
; VI-NEXT: v_max_f16_e64 v2, s4, s4
; VI-NEXT: v_max_f16_e64 v0, s6, s6
; VI-NEXT: s_lshr_b32 s4, s4, 16
@@ -821,7 +821,7 @@ define amdgpu_kernel void @minnum_v4f16(
; VI-NEXT: v_max_f16_e64 v2, s5, s5
; VI-NEXT: v_max_f16_e64 v3, s4, s4
; VI-NEXT: v_min_f16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: v_or_b32_e32 v0, v0, v2
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
;
@@ -930,12 +930,12 @@ define amdgpu_kernel void @fmin_v4f16_imm_a(
; VI-NEXT: v_min_f16_sdwa v0, v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; VI-NEXT: v_min_f16_e32 v1, 0x4200, v1
; VI-NEXT: s_lshr_b32 s4, s4, 16
-; VI-NEXT: v_or_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: v_or_b32_e32 v1, v1, v0
; VI-NEXT: v_min_f16_e32 v0, 0x4800, v2
; VI-NEXT: v_max_f16_e64 v2, s4, s4
; VI-NEXT: v_mov_b32_e32 v3, 0x4000
; VI-NEXT: v_min_f16_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: v_or_b32_e32 v0, v0, v2
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
;
diff --git a/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll b/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll
index ed2202c51028..789aa514695e 100644
--- a/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll
+++ b/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll
@@ -248,7 +248,7 @@ define i32 @zext_div_fixup_f16(half %x, half %y, half %z) {
; GFX8-NEXT: s_setpc_b64
; GFX9: v_cvt_f16_f32_e32 v0, v0
-; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX9-NEXT: s_setpc_b64
; GFX10: v_cvt_f16_f32_e32 v0, v0
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
@@ -260,9 +260,13 @@ define i32 @zext_fptrunc_f16(float %x) {
}
; GCN-LABEL: {{^}}zext_fptrunc_fma_f16:
+; GFX8: v_fma_f32 v0, v0, v1, v2
+; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX8-NEXT: s_setpc_b64
+
; GFX900: v_fma_f32 v0, v0, v1, v2
; GFX900-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX900-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX900-NEXT: s_setpc_b64
; GFX906: v_fma_mixlo_f16 v0, v0, v1, v2
; GFX906-NEXT: v_and_b32_e32 v0, 0xffff, v0
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll
index d504b1901303..ec3f62238f3f 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll
@@ -67,7 +67,7 @@ define <2 x half> @v_constained_fadd_v2f16_fpexcept_strict(<2 x half> %x, <2 x h
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_add_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_add_f16_e32 v0, v0, v1
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fadd_v2f16_fpexcept_strict:
@@ -92,7 +92,7 @@ define <2 x half> @v_constained_fadd_v2f16_fpexcept_ignore(<2 x half> %x, <2 x h
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_add_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_add_f16_e32 v0, v0, v1
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fadd_v2f16_fpexcept_ignore:
@@ -117,7 +117,7 @@ define <2 x half> @v_constained_fadd_v2f16_fpexcept_maytrap(<2 x half> %x, <2 x
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_add_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_add_f16_e32 v0, v0, v1
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fadd_v2f16_fpexcept_maytrap:
@@ -143,7 +143,7 @@ define <3 x half> @v_constained_fadd_v3f16_fpexcept_strict(<3 x half> %x, <3 x h
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_add_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_add_f16_e32 v0, v0, v2
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
; GFX8-NEXT: v_add_f16_e32 v1, v1, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
@@ -164,12 +164,9 @@ define <4 x half> @v_constained_fadd_v4f16_fpexcept_strict(<4 x half> %x, <4 x h
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_add_f16_sdwa v4, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: v_add_f16_e32 v1, v1, v3
; GFX9-NEXT: v_add_f16_sdwa v5, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_add_f16_e32 v0, v0, v2
-; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff
-; GFX9-NEXT: v_add_f16_e32 v1, v1, v3
-; GFX9-NEXT: v_and_b32_e32 v0, v2, v0
-; GFX9-NEXT: v_and_b32_e32 v1, v2, v1
; GFX9-NEXT: v_lshl_or_b32 v0, v5, 16, v0
; GFX9-NEXT: v_lshl_or_b32 v1, v4, 16, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -181,8 +178,8 @@ define <4 x half> @v_constained_fadd_v4f16_fpexcept_strict(<4 x half> %x, <4 x h
; GFX8-NEXT: v_add_f16_e32 v1, v1, v3
; GFX8-NEXT: v_add_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_add_f16_e32 v0, v0, v2
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v5
+; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fadd_v4f16_fpexcept_strict:
@@ -234,7 +231,7 @@ define amdgpu_ps <2 x half> @s_constained_fadd_v2f16_fpexcept_strict(<2 x half>
; GFX8-NEXT: v_add_f16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: v_add_f16_e32 v1, s2, v1
-; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: s_constained_fadd_v2f16_fpexcept_strict:
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll
index 110e65144e0d..0aa92534c437 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll
@@ -37,7 +37,7 @@ define <2 x half> @v_constained_fma_v2f16_fpexcept_strict(<2 x half> %x, <2 x ha
; GFX8-NEXT: v_fma_f16 v3, v5, v4, v3
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fma_v2f16_fpexcept_strict:
@@ -67,7 +67,7 @@ define <3 x half> @v_constained_fma_v3f16_fpexcept_strict(<3 x half> %x, <3 x ha
; GFX8-NEXT: v_fma_f16 v6, v8, v7, v6
; GFX8-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; GFX8-NEXT: v_fma_f16 v0, v0, v2, v4
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v6
; GFX8-NEXT: v_fma_f16 v1, v1, v3, v5
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
@@ -117,10 +117,10 @@ define <4 x half> @v_constained_fma_v4f16_fpexcept_strict(<4 x half> %x, <4 x ha
; GFX8-NEXT: v_fma_f16 v7, v9, v8, v7
; GFX8-NEXT: v_fma_f16 v0, v0, v2, v4
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v7
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: v_fma_f16 v1, v1, v3, v5
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v6
-; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fma_v4f16_fpexcept_strict:
@@ -221,7 +221,7 @@ define <2 x half> @v_constained_fma_v2f16_fpexcept_strict_fneg_fneg(<2 x half> %
; GFX8-NEXT: v_fma_f16 v3, -v5, -v4, v3
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX8-NEXT: v_fma_f16 v0, -v0, -v1, v2
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fma_v2f16_fpexcept_strict_fneg_fneg:
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll
index de3a40b50a41..4019e39df83f 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll
@@ -67,7 +67,7 @@ define <2 x half> @v_constained_fmul_v2f16_fpexcept_strict(<2 x half> %x, <2 x h
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mul_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fmul_v2f16_fpexcept_strict:
@@ -92,7 +92,7 @@ define <2 x half> @v_constained_fmul_v2f16_fpexcept_ignore(<2 x half> %x, <2 x h
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mul_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fmul_v2f16_fpexcept_ignore:
@@ -117,7 +117,7 @@ define <2 x half> @v_constained_fmul_v2f16_fpexcept_maytrap(<2 x half> %x, <2 x
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mul_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v0, v0, v1
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fmul_v2f16_fpexcept_maytrap:
@@ -143,7 +143,7 @@ define <3 x half> @v_constained_fmul_v3f16_fpexcept_strict(<3 x half> %x, <3 x h
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mul_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v0, v0, v2
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
; GFX8-NEXT: v_mul_f16_e32 v1, v1, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
@@ -164,12 +164,9 @@ define <4 x half> @v_constained_fmul_v4f16_fpexcept_strict(<4 x half> %x, <4 x h
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_mul_f16_sdwa v4, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: v_mul_f16_e32 v1, v1, v3
; GFX9-NEXT: v_mul_f16_sdwa v5, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_mul_f16_e32 v0, v0, v2
-; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff
-; GFX9-NEXT: v_mul_f16_e32 v1, v1, v3
-; GFX9-NEXT: v_and_b32_e32 v0, v2, v0
-; GFX9-NEXT: v_and_b32_e32 v1, v2, v1
; GFX9-NEXT: v_lshl_or_b32 v0, v5, 16, v0
; GFX9-NEXT: v_lshl_or_b32 v1, v4, 16, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -181,8 +178,8 @@ define <4 x half> @v_constained_fmul_v4f16_fpexcept_strict(<4 x half> %x, <4 x h
; GFX8-NEXT: v_mul_f16_e32 v1, v1, v3
; GFX8-NEXT: v_mul_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_mul_f16_e32 v0, v0, v2
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v5
+; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fmul_v4f16_fpexcept_strict:
@@ -234,7 +231,7 @@ define amdgpu_ps <2 x half> @s_constained_fmul_v2f16_fpexcept_strict(<2 x half>
; GFX8-NEXT: v_mul_f16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: v_mul_f16_e32 v1, s2, v1
-; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: s_constained_fmul_v2f16_fpexcept_strict:
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
index ddbf4f3727e4..73e2b5540865 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
@@ -61,7 +61,6 @@ define <2 x half> @v_constained_fsub_v2f16_fpexcept_strict(<2 x half> %x, <2 x h
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_sub_f16_e32 v0, v0, v1
-; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX9-NEXT: v_lshl_or_b32 v0, v2, 16, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
@@ -70,7 +69,7 @@ define <2 x half> @v_constained_fsub_v2f16_fpexcept_strict(<2 x half> %x, <2 x h
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_sub_f16_e32 v0, v0, v1
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fsub_v2f16_fpexcept_strict:
@@ -92,7 +91,6 @@ define <2 x half> @v_constained_fsub_v2f16_fpexcept_ignore(<2 x half> %x, <2 x h
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_sub_f16_e32 v0, v0, v1
-; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX9-NEXT: v_lshl_or_b32 v0, v2, 16, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
@@ -101,7 +99,7 @@ define <2 x half> @v_constained_fsub_v2f16_fpexcept_ignore(<2 x half> %x, <2 x h
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_sub_f16_e32 v0, v0, v1
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fsub_v2f16_fpexcept_ignore:
@@ -123,7 +121,6 @@ define <2 x half> @v_constained_fsub_v2f16_fpexcept_maytrap(<2 x half> %x, <2 x
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_sub_f16_e32 v0, v0, v1
-; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX9-NEXT: v_lshl_or_b32 v0, v2, 16, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
@@ -132,7 +129,7 @@ define <2 x half> @v_constained_fsub_v2f16_fpexcept_maytrap(<2 x half> %x, <2 x
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_sub_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_sub_f16_e32 v0, v0, v1
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fsub_v2f16_fpexcept_maytrap:
@@ -154,7 +151,6 @@ define <3 x half> @v_constained_fsub_v3f16_fpexcept_strict(<3 x half> %x, <3 x h
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_sub_f16_sdwa v4, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_sub_f16_e32 v0, v0, v2
-; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX9-NEXT: v_lshl_or_b32 v0, v4, 16, v0
; GFX9-NEXT: v_sub_f16_e32 v1, v1, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -164,7 +160,7 @@ define <3 x half> @v_constained_fsub_v3f16_fpexcept_strict(<3 x half> %x, <3 x h
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_sub_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_sub_f16_e32 v0, v0, v2
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
; GFX8-NEXT: v_sub_f16_e32 v1, v1, v3
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
@@ -188,12 +184,9 @@ define <4 x half> @v_constained_fsub_v4f16_fpexcept_strict(<4 x half> %x, <4 x h
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_sub_f16_sdwa v4, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX9-NEXT: v_sub_f16_e32 v1, v1, v3
; GFX9-NEXT: v_sub_f16_sdwa v5, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX9-NEXT: v_sub_f16_e32 v0, v0, v2
-; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff
-; GFX9-NEXT: v_sub_f16_e32 v1, v1, v3
-; GFX9-NEXT: v_and_b32_e32 v0, v2, v0
-; GFX9-NEXT: v_and_b32_e32 v1, v2, v1
; GFX9-NEXT: v_lshl_or_b32 v0, v5, 16, v0
; GFX9-NEXT: v_lshl_or_b32 v1, v4, 16, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -205,8 +198,8 @@ define <4 x half> @v_constained_fsub_v4f16_fpexcept_strict(<4 x half> %x, <4 x h
; GFX8-NEXT: v_sub_f16_e32 v1, v1, v3
; GFX8-NEXT: v_sub_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_sub_f16_e32 v0, v0, v2
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v5
+; GFX8-NEXT: v_or_b32_e32 v1, v1, v4
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_constained_fsub_v4f16_fpexcept_strict:
@@ -245,13 +238,12 @@ define amdgpu_ps half @s_constained_fsub_f16_fpexcept_strict(half inreg %x, half
define amdgpu_ps <2 x half> @s_constained_fsub_v2f16_fpexcept_strict(<2 x half> inreg %x, <2 x half> inreg %y) #0 {
; GFX9-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
; GFX9: ; %bb.0:
-; GFX9-NEXT: v_mov_b32_e32 v1, s3
; GFX9-NEXT: s_lshr_b32 s0, s3, 16
-; GFX9-NEXT: v_sub_f16_e32 v1, s2, v1
+; GFX9-NEXT: v_mov_b32_e32 v1, s3
; GFX9-NEXT: s_lshr_b32 s1, s2, 16
; GFX9-NEXT: v_mov_b32_e32 v0, s0
; GFX9-NEXT: v_sub_f16_e32 v0, s1, v0
-; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX9-NEXT: v_sub_f16_e32 v1, s2, v1
; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v1
; GFX9-NEXT: ; return to shader part epilog
;
@@ -264,7 +256,7 @@ define amdgpu_ps <2 x half> @s_constained_fsub_v2f16_fpexcept_strict(<2 x half>
; GFX8-NEXT: v_sub_f16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: v_sub_f16_e32 v1, s2, v1
-; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: s_constained_fsub_v2f16_fpexcept_strict:
diff --git a/llvm/test/CodeGen/AMDGPU/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/uaddsat.ll
index 5082772fae98..3a50f89dbaba 100644
--- a/llvm/test/CodeGen/AMDGPU/uaddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/uaddsat.ll
@@ -104,7 +104,7 @@ define <2 x i16> @v_uaddsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_add_u16_sdwa v2, v0, v1 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_add_u16_e64 v0, v0, v1 clamp
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_uaddsat_v2i16:
@@ -145,7 +145,7 @@ define <3 x i16> @v_uaddsat_v3i16(<3 x i16> %lhs, <3 x i16> %rhs) {
; GFX8-NEXT: v_add_u16_sdwa v4, v0, v2 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_add_u16_e64 v0, v0, v2 clamp
; GFX8-NEXT: v_add_u16_e64 v1, v1, v3 clamp
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_uaddsat_v3i16:
@@ -192,8 +192,8 @@ define <2 x float> @v_uaddsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; GFX8-NEXT: v_add_u16_e64 v0, v0, v2 clamp
; GFX8-NEXT: v_add_u16_sdwa v2, v1, v3 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_add_u16_e64 v1, v1, v3 clamp
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
+; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_uaddsat_v4i16:
diff --git a/llvm/test/CodeGen/AMDGPU/usubsat.ll b/llvm/test/CodeGen/AMDGPU/usubsat.ll
index 49daf7108d73..c1062c82ba5a 100644
--- a/llvm/test/CodeGen/AMDGPU/usubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/usubsat.ll
@@ -126,7 +126,7 @@ define <2 x i16> @v_usubsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) {
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_sub_u16_sdwa v2, v0, v1 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_e64 v0, v0, v1 clamp
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_usubsat_v2i16:
@@ -173,7 +173,7 @@ define <3 x i16> @v_usubsat_v3i16(<3 x i16> %lhs, <3 x i16> %rhs) {
; GFX8-NEXT: v_sub_u16_sdwa v4, v0, v2 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_e64 v0, v0, v2 clamp
; GFX8-NEXT: v_sub_u16_e64 v1, v1, v3 clamp
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_usubsat_v3i16:
@@ -228,8 +228,8 @@ define <2 x float> @v_usubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
; GFX8-NEXT: v_sub_u16_e64 v0, v0, v2 clamp
; GFX8-NEXT: v_sub_u16_sdwa v2, v1, v3 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
; GFX8-NEXT: v_sub_u16_e64 v1, v1, v3 clamp
-; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
+; GFX8-NEXT: v_or_b32_e32 v1, v1, v2
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-LABEL: v_usubsat_v4i16:
More information about the llvm-commits
mailing list