[llvm] [AMDGPU] Remove wavefrontsize feature from GFX10+ (PR #98400)
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 11 01:30:57 PDT 2024
https://github.com/rampitec updated https://github.com/llvm/llvm-project/pull/98400
>From ac6a4836fd133969cc61a8e2af92cce6dfbb5c1b Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Wed, 10 Jul 2024 14:44:15 -0700
Subject: [PATCH 1/4] [AMDGPU] Remove wavefrontsize feature from GFX10+
Processor definition shall not include a default feature which
may be switched off by a different wave size. This allows not
to write -mattr=-wavefrontsize32,+wavefrontsize64 in tests.
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 3 -
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 8 +
.../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 11 +-
.../Disassembler/AMDGPUDisassembler.cpp | 20 +-
.../AMDGPU/check-subtarget-features.ll | 2 -
.../AMDGPU/llvm.amdgcn.wavefrontsize.ll | 16 +-
llvm/test/CodeGen/AMDGPU/unknown-processor.ll | 2 +-
.../MC/AMDGPU/gfx11_asm_vopc_t16_promote.s | 654 +++++++++---------
llvm/test/MC/AMDGPU/wave32.s | 8 +-
.../MC/Disassembler/AMDGPU/gfx10-wave32.txt | 4 +-
10 files changed, 378 insertions(+), 350 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 94e8e77b3c052..dfc8eaea66f7b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1464,7 +1464,6 @@ def FeatureISAVersion10_Common : FeatureSet<
FeatureLDSBankCount32,
FeatureDLInsts,
FeatureNSAEncoding,
- FeatureWavefrontSize32,
FeatureBackOffBarrier]>;
def FeatureISAVersion10_1_Common : FeatureSet<
@@ -1548,7 +1547,6 @@ def FeatureISAVersion11_Common : FeatureSet<
FeatureDot10Insts,
FeatureNSAEncoding,
FeaturePartialNSAEncoding,
- FeatureWavefrontSize32,
FeatureShaderCyclesRegister,
FeatureArchitectedFlatScratch,
FeatureAtomicFaddRtnInsts,
@@ -1625,7 +1623,6 @@ def FeatureISAVersion12 : FeatureSet<
FeatureDot11Insts,
FeatureNSAEncoding,
FeaturePartialNSAEncoding,
- FeatureWavefrontSize32,
FeatureShaderCyclesHiLoRegisters,
FeatureArchitectedFlatScratch,
FeatureArchitectedSGPRs,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 21fe1bc31a27e..a59893d3cf85d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -105,6 +105,14 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
: AMDGPUSubtarget::SOUTHERN_ISLANDS;
}
+ if (!hasFeature(AMDGPU::FeatureWavefrontSize32) &&
+ !hasFeature(AMDGPU::FeatureWavefrontSize64)) {
+ if (getGeneration() >= AMDGPUSubtarget::GFX10)
+ ToggleFeature(AMDGPU::FeatureWavefrontSize32);
+ else
+ ToggleFeature(AMDGPU::FeatureWavefrontSize64);
+ }
+
// We don't support FP64 for EG/NI atm.
assert(!hasFP64() || (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS));
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index b08957d22ee74..1c3925cfad464 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1408,9 +1408,18 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
copySTI().ToggleFeature("southern-islands");
}
+ AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
+ FeatureBitset FB = getFeatureBits();
+ if (!FB[AMDGPU::FeatureWavefrontSize64] &&
+ !FB[AMDGPU::FeatureWavefrontSize32]) {
+ if (ISA.Major >= 10)
+ copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize32);
+ else
+ copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize64);
+ }
+
setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
- AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 695b2f246a778..57d717dd9e634 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -45,10 +45,26 @@ using namespace llvm;
using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
+static const MCSubtargetInfo &addDefaultWaveSize(const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ if (!STI.hasFeature(AMDGPU::FeatureWavefrontSize64) &&
+ !STI.hasFeature(AMDGPU::FeatureWavefrontSize32)) {
+ MCSubtargetInfo &STICopy = Ctx.getSubtargetCopy(STI);
+ if (AMDGPU::isGFX10Plus(STI))
+ STICopy.ToggleFeature(AMDGPU::FeatureWavefrontSize32);
+ else
+ STICopy.ToggleFeature(AMDGPU::FeatureWavefrontSize64);
+ return STICopy;
+ }
+
+ return STI;
+}
+
AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
MCContext &Ctx, MCInstrInfo const *MCII)
- : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
- MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
+ : MCDisassembler(addDefaultWaveSize(STI, Ctx), Ctx), MCII(MCII),
+ MRI(*Ctx.getRegisterInfo()), MAI(*Ctx.getAsmInfo()),
+ TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
// ToDo: AMDGPUDisassembler supports only VI ISA.
if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
diff --git a/llvm/test/CodeGen/AMDGPU/check-subtarget-features.ll b/llvm/test/CodeGen/AMDGPU/check-subtarget-features.ll
index c246939811046..95ae8a6adfdf8 100644
--- a/llvm/test/CodeGen/AMDGPU/check-subtarget-features.ll
+++ b/llvm/test/CodeGen/AMDGPU/check-subtarget-features.ll
@@ -1,5 +1,3 @@
-; RUN: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,-wavefrontsize64 < %s 2>&1 | FileCheck %s -check-prefix=ERR -implicit-check-not=error:
-; RUN: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,-wavefrontsize64 < %s 2>&1 | FileCheck %s -check-prefix=ERR -implicit-check-not=error:
; RUN: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+wavefrontsize64 < %s 2>&1 | FileCheck %s -check-prefix=ERR -implicit-check-not=error:
; RUN: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+wavefrontsize64 < %s 2>&1 | FileCheck %s -check-prefix=ERR -implicit-check-not=error:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll
index 270ab5fee1125..824d3708c027d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll
@@ -1,8 +1,8 @@
; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W32 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,W32 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W32 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,W32 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
; RUN: opt -O3 -S < %s | FileCheck -check-prefix=OPT %s
; RUN: opt -mtriple=amdgcn-- -O3 -S < %s | FileCheck -check-prefix=OPT %s
@@ -10,10 +10,10 @@
; RUN: opt -mtriple=amdgcn-- -passes='default<O3>' -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s
; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
; RUN: opt -mtriple=amdgcn-- -mcpu=tonga -O3 -S < %s | FileCheck -check-prefix=OPT %s
-; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize32,-wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
-; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=-wavefrontsize32,+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
-; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize32,-wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
-; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=-wavefrontsize32,+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
; GCN-LABEL: {{^}}fold_wavefrontsize:
; OPT-LABEL: define amdgpu_kernel void @fold_wavefrontsize(
diff --git a/llvm/test/CodeGen/AMDGPU/unknown-processor.ll b/llvm/test/CodeGen/AMDGPU/unknown-processor.ll
index 683ba98e52cf1..9cfba8b2e5c04 100644
--- a/llvm/test/CodeGen/AMDGPU/unknown-processor.ll
+++ b/llvm/test/CodeGen/AMDGPU/unknown-processor.ll
@@ -1,4 +1,4 @@
-; RUN: not llc -mtriple=amdgcn-- -mcpu=unknown -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=unknown -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR -check-prefix=GCN %s
; RUN: llc -mtriple=r600-- -mcpu=unknown -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR -check-prefix=R600 %s
target datalayout = "A5"
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s
index b16caed8b275f..75f20b0c7f0c4 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s
@@ -12,13 +12,13 @@ v_cmp_class_f16 vcc, vcc_hi, v255
v_cmp_class_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_class_f16_e64
-v_cmp_class_f16 vcc_lo, v127, v255
+v_cmp_class_f16 vcc, v127, v255
// GFX11: v_cmp_class_f16_e64
-v_cmp_class_f16 vcc_lo, vcc_hi, v255
+v_cmp_class_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_class_f16_e64
-v_cmp_class_f16 vcc_lo, vcc_lo, v255
+v_cmp_class_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_class_f16_e64
v_cmp_eq_f16 vcc, v1, v255
@@ -33,16 +33,16 @@ v_cmp_eq_f16 vcc, vcc_hi, v255
v_cmp_eq_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_eq_f16_e64
-v_cmp_eq_f16 vcc_lo, v1, v255
+v_cmp_eq_f16 vcc, v1, v255
// GFX11: v_cmp_eq_f16_e64
-v_cmp_eq_f16 vcc_lo, v127, v255
+v_cmp_eq_f16 vcc, v127, v255
// GFX11: v_cmp_eq_f16_e64
-v_cmp_eq_f16 vcc_lo, vcc_hi, v255
+v_cmp_eq_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_eq_f16_e64
-v_cmp_eq_f16 vcc_lo, vcc_lo, v255
+v_cmp_eq_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_eq_f16_e64
v_cmp_eq_i16 vcc, v1, v255
@@ -57,16 +57,16 @@ v_cmp_eq_i16 vcc, vcc_hi, v255
v_cmp_eq_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_eq_i16_e64
-v_cmp_eq_i16 vcc_lo, v1, v255
+v_cmp_eq_i16 vcc, v1, v255
// GFX11: v_cmp_eq_i16_e64
-v_cmp_eq_i16 vcc_lo, v127, v255
+v_cmp_eq_i16 vcc, v127, v255
// GFX11: v_cmp_eq_i16_e64
-v_cmp_eq_i16 vcc_lo, vcc_hi, v255
+v_cmp_eq_i16 vcc, vcc_hi, v255
// GFX11: v_cmp_eq_i16_e64
-v_cmp_eq_i16 vcc_lo, vcc_lo, v255
+v_cmp_eq_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_eq_i16_e64
v_cmp_eq_u16 vcc, v1, v255
@@ -81,16 +81,16 @@ v_cmp_eq_u16 vcc, vcc_hi, v255
v_cmp_eq_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_eq_u16_e64
-v_cmp_eq_u16 vcc_lo, v1, v255
+v_cmp_eq_u16 vcc, v1, v255
// GFX11: v_cmp_eq_u16_e64
-v_cmp_eq_u16 vcc_lo, v127, v255
+v_cmp_eq_u16 vcc, v127, v255
// GFX11: v_cmp_eq_u16_e64
-v_cmp_eq_u16 vcc_lo, vcc_hi, v255
+v_cmp_eq_u16 vcc, vcc_hi, v255
// GFX11: v_cmp_eq_u16_e64
-v_cmp_eq_u16 vcc_lo, vcc_lo, v255
+v_cmp_eq_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_eq_u16_e64
v_cmp_f_f16 vcc, v1, v255
@@ -105,16 +105,16 @@ v_cmp_f_f16 vcc, vcc_hi, v255
v_cmp_f_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_f_f16_e64
-v_cmp_f_f16 vcc_lo, v1, v255
+v_cmp_f_f16 vcc, v1, v255
// GFX11: v_cmp_f_f16_e64
-v_cmp_f_f16 vcc_lo, v127, v255
+v_cmp_f_f16 vcc, v127, v255
// GFX11: v_cmp_f_f16_e64
-v_cmp_f_f16 vcc_lo, vcc_hi, v255
+v_cmp_f_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_f_f16_e64
-v_cmp_f_f16 vcc_lo, vcc_lo, v255
+v_cmp_f_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_f_f16_e64
v_cmp_ge_f16 vcc, v1, v255
@@ -129,16 +129,16 @@ v_cmp_ge_f16 vcc, vcc_hi, v255
v_cmp_ge_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_ge_f16_e64
-v_cmp_ge_f16 vcc_lo, v1, v255
+v_cmp_ge_f16 vcc, v1, v255
// GFX11: v_cmp_ge_f16_e64
-v_cmp_ge_f16 vcc_lo, v127, v255
+v_cmp_ge_f16 vcc, v127, v255
// GFX11: v_cmp_ge_f16_e64
-v_cmp_ge_f16 vcc_lo, vcc_hi, v255
+v_cmp_ge_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_ge_f16_e64
-v_cmp_ge_f16 vcc_lo, vcc_lo, v255
+v_cmp_ge_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_ge_f16_e64
v_cmp_ge_i16 vcc, v1, v255
@@ -153,16 +153,16 @@ v_cmp_ge_i16 vcc, vcc_hi, v255
v_cmp_ge_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_ge_i16_e64
-v_cmp_ge_i16 vcc_lo, v1, v255
+v_cmp_ge_i16 vcc, v1, v255
// GFX11: v_cmp_ge_i16_e64
-v_cmp_ge_i16 vcc_lo, v127, v255
+v_cmp_ge_i16 vcc, v127, v255
// GFX11: v_cmp_ge_i16_e64
-v_cmp_ge_i16 vcc_lo, vcc_hi, v255
+v_cmp_ge_i16 vcc, vcc_hi, v255
// GFX11: v_cmp_ge_i16_e64
-v_cmp_ge_i16 vcc_lo, vcc_lo, v255
+v_cmp_ge_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_ge_i16_e64
v_cmp_ge_u16 vcc, v1, v255
@@ -177,16 +177,16 @@ v_cmp_ge_u16 vcc, vcc_hi, v255
v_cmp_ge_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_ge_u16_e64
-v_cmp_ge_u16 vcc_lo, v1, v255
+v_cmp_ge_u16 vcc, v1, v255
// GFX11: v_cmp_ge_u16_e64
-v_cmp_ge_u16 vcc_lo, v127, v255
+v_cmp_ge_u16 vcc, v127, v255
// GFX11: v_cmp_ge_u16_e64
-v_cmp_ge_u16 vcc_lo, vcc_hi, v255
+v_cmp_ge_u16 vcc, vcc_hi, v255
// GFX11: v_cmp_ge_u16_e64
-v_cmp_ge_u16 vcc_lo, vcc_lo, v255
+v_cmp_ge_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_ge_u16_e64
v_cmp_gt_f16 vcc, v1, v255
@@ -201,16 +201,16 @@ v_cmp_gt_f16 vcc, vcc_hi, v255
v_cmp_gt_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_gt_f16_e64
-v_cmp_gt_f16 vcc_lo, v1, v255
+v_cmp_gt_f16 vcc, v1, v255
// GFX11: v_cmp_gt_f16_e64
-v_cmp_gt_f16 vcc_lo, v127, v255
+v_cmp_gt_f16 vcc, v127, v255
// GFX11: v_cmp_gt_f16_e64
-v_cmp_gt_f16 vcc_lo, vcc_hi, v255
+v_cmp_gt_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_gt_f16_e64
-v_cmp_gt_f16 vcc_lo, vcc_lo, v255
+v_cmp_gt_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_gt_f16_e64
v_cmp_gt_i16 vcc, v1, v255
@@ -225,16 +225,16 @@ v_cmp_gt_i16 vcc, vcc_hi, v255
v_cmp_gt_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_gt_i16_e64
-v_cmp_gt_i16 vcc_lo, v1, v255
+v_cmp_gt_i16 vcc, v1, v255
// GFX11: v_cmp_gt_i16_e64
-v_cmp_gt_i16 vcc_lo, v127, v255
+v_cmp_gt_i16 vcc, v127, v255
// GFX11: v_cmp_gt_i16_e64
-v_cmp_gt_i16 vcc_lo, vcc_hi, v255
+v_cmp_gt_i16 vcc, vcc_hi, v255
// GFX11: v_cmp_gt_i16_e64
-v_cmp_gt_i16 vcc_lo, vcc_lo, v255
+v_cmp_gt_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_gt_i16_e64
v_cmp_gt_u16 vcc, v1, v255
@@ -249,16 +249,16 @@ v_cmp_gt_u16 vcc, vcc_hi, v255
v_cmp_gt_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_gt_u16_e64
-v_cmp_gt_u16 vcc_lo, v1, v255
+v_cmp_gt_u16 vcc, v1, v255
// GFX11: v_cmp_gt_u16_e64
-v_cmp_gt_u16 vcc_lo, v127, v255
+v_cmp_gt_u16 vcc, v127, v255
// GFX11: v_cmp_gt_u16_e64
-v_cmp_gt_u16 vcc_lo, vcc_hi, v255
+v_cmp_gt_u16 vcc, vcc_hi, v255
// GFX11: v_cmp_gt_u16_e64
-v_cmp_gt_u16 vcc_lo, vcc_lo, v255
+v_cmp_gt_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_gt_u16_e64
v_cmp_le_f16 vcc, v1, v255
@@ -273,16 +273,16 @@ v_cmp_le_f16 vcc, vcc_hi, v255
v_cmp_le_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_le_f16_e64
-v_cmp_le_f16 vcc_lo, v1, v255
+v_cmp_le_f16 vcc, v1, v255
// GFX11: v_cmp_le_f16_e64
-v_cmp_le_f16 vcc_lo, v127, v255
+v_cmp_le_f16 vcc, v127, v255
// GFX11: v_cmp_le_f16_e64
-v_cmp_le_f16 vcc_lo, vcc_hi, v255
+v_cmp_le_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_le_f16_e64
-v_cmp_le_f16 vcc_lo, vcc_lo, v255
+v_cmp_le_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_le_f16_e64
v_cmp_le_i16 vcc, v1, v255
@@ -297,16 +297,16 @@ v_cmp_le_i16 vcc, vcc_hi, v255
v_cmp_le_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_le_i16_e64
-v_cmp_le_i16 vcc_lo, v1, v255
+v_cmp_le_i16 vcc, v1, v255
// GFX11: v_cmp_le_i16_e64
-v_cmp_le_i16 vcc_lo, v127, v255
+v_cmp_le_i16 vcc, v127, v255
// GFX11: v_cmp_le_i16_e64
-v_cmp_le_i16 vcc_lo, vcc_hi, v255
+v_cmp_le_i16 vcc, vcc_hi, v255
// GFX11: v_cmp_le_i16_e64
-v_cmp_le_i16 vcc_lo, vcc_lo, v255
+v_cmp_le_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_le_i16_e64
v_cmp_le_u16 vcc, v1, v255
@@ -321,16 +321,16 @@ v_cmp_le_u16 vcc, vcc_hi, v255
v_cmp_le_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_le_u16_e64
-v_cmp_le_u16 vcc_lo, v1, v255
+v_cmp_le_u16 vcc, v1, v255
// GFX11: v_cmp_le_u16_e64
-v_cmp_le_u16 vcc_lo, v127, v255
+v_cmp_le_u16 vcc, v127, v255
// GFX11: v_cmp_le_u16_e64
-v_cmp_le_u16 vcc_lo, vcc_hi, v255
+v_cmp_le_u16 vcc, vcc_hi, v255
// GFX11: v_cmp_le_u16_e64
-v_cmp_le_u16 vcc_lo, vcc_lo, v255
+v_cmp_le_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_le_u16_e64
v_cmp_lg_f16 vcc, v1, v255
@@ -345,16 +345,16 @@ v_cmp_lg_f16 vcc, vcc_hi, v255
v_cmp_lg_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_lg_f16_e64
-v_cmp_lg_f16 vcc_lo, v1, v255
+v_cmp_lg_f16 vcc, v1, v255
// GFX11: v_cmp_lg_f16_e64
-v_cmp_lg_f16 vcc_lo, v127, v255
+v_cmp_lg_f16 vcc, v127, v255
// GFX11: v_cmp_lg_f16_e64
-v_cmp_lg_f16 vcc_lo, vcc_hi, v255
+v_cmp_lg_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_lg_f16_e64
-v_cmp_lg_f16 vcc_lo, vcc_lo, v255
+v_cmp_lg_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_lg_f16_e64
v_cmp_lt_f16 vcc, v1, v255
@@ -369,16 +369,16 @@ v_cmp_lt_f16 vcc, vcc_hi, v255
v_cmp_lt_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_lt_f16_e64
-v_cmp_lt_f16 vcc_lo, v1, v255
+v_cmp_lt_f16 vcc, v1, v255
// GFX11: v_cmp_lt_f16_e64
-v_cmp_lt_f16 vcc_lo, v127, v255
+v_cmp_lt_f16 vcc, v127, v255
// GFX11: v_cmp_lt_f16_e64
-v_cmp_lt_f16 vcc_lo, vcc_hi, v255
+v_cmp_lt_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_lt_f16_e64
-v_cmp_lt_f16 vcc_lo, vcc_lo, v255
+v_cmp_lt_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_lt_f16_e64
v_cmp_lt_i16 vcc, v1, v255
@@ -393,16 +393,16 @@ v_cmp_lt_i16 vcc, vcc_hi, v255
v_cmp_lt_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_lt_i16_e64
-v_cmp_lt_i16 vcc_lo, v1, v255
+v_cmp_lt_i16 vcc, v1, v255
// GFX11: v_cmp_lt_i16_e64
-v_cmp_lt_i16 vcc_lo, v127, v255
+v_cmp_lt_i16 vcc, v127, v255
// GFX11: v_cmp_lt_i16_e64
-v_cmp_lt_i16 vcc_lo, vcc_hi, v255
+v_cmp_lt_i16 vcc, vcc_hi, v255
// GFX11: v_cmp_lt_i16_e64
-v_cmp_lt_i16 vcc_lo, vcc_lo, v255
+v_cmp_lt_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_lt_i16_e64
v_cmp_lt_u16 vcc, v1, v255
@@ -417,16 +417,16 @@ v_cmp_lt_u16 vcc, vcc_hi, v255
v_cmp_lt_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_lt_u16_e64
-v_cmp_lt_u16 vcc_lo, v1, v255
+v_cmp_lt_u16 vcc, v1, v255
// GFX11: v_cmp_lt_u16_e64
-v_cmp_lt_u16 vcc_lo, v127, v255
+v_cmp_lt_u16 vcc, v127, v255
// GFX11: v_cmp_lt_u16_e64
-v_cmp_lt_u16 vcc_lo, vcc_hi, v255
+v_cmp_lt_u16 vcc, vcc_hi, v255
// GFX11: v_cmp_lt_u16_e64
-v_cmp_lt_u16 vcc_lo, vcc_lo, v255
+v_cmp_lt_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_lt_u16_e64
v_cmp_ne_i16 vcc, v1, v255
@@ -441,16 +441,16 @@ v_cmp_ne_i16 vcc, vcc_hi, v255
v_cmp_ne_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_ne_i16_e64
-v_cmp_ne_i16 vcc_lo, v1, v255
+v_cmp_ne_i16 vcc, v1, v255
// GFX11: v_cmp_ne_i16_e64
-v_cmp_ne_i16 vcc_lo, v127, v255
+v_cmp_ne_i16 vcc, v127, v255
// GFX11: v_cmp_ne_i16_e64
-v_cmp_ne_i16 vcc_lo, vcc_hi, v255
+v_cmp_ne_i16 vcc, vcc_hi, v255
// GFX11: v_cmp_ne_i16_e64
-v_cmp_ne_i16 vcc_lo, vcc_lo, v255
+v_cmp_ne_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_ne_i16_e64
v_cmp_ne_u16 vcc, v1, v255
@@ -465,16 +465,16 @@ v_cmp_ne_u16 vcc, vcc_hi, v255
v_cmp_ne_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_ne_u16_e64
-v_cmp_ne_u16 vcc_lo, v1, v255
+v_cmp_ne_u16 vcc, v1, v255
// GFX11: v_cmp_ne_u16_e64
-v_cmp_ne_u16 vcc_lo, v127, v255
+v_cmp_ne_u16 vcc, v127, v255
// GFX11: v_cmp_ne_u16_e64
-v_cmp_ne_u16 vcc_lo, vcc_hi, v255
+v_cmp_ne_u16 vcc, vcc_hi, v255
// GFX11: v_cmp_ne_u16_e64
-v_cmp_ne_u16 vcc_lo, vcc_lo, v255
+v_cmp_ne_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_ne_u16_e64
v_cmp_neq_f16 vcc, v1, v255
@@ -489,16 +489,16 @@ v_cmp_neq_f16 vcc, vcc_hi, v255
v_cmp_neq_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_neq_f16_e64
-v_cmp_neq_f16 vcc_lo, v1, v255
+v_cmp_neq_f16 vcc, v1, v255
// GFX11: v_cmp_neq_f16_e64
-v_cmp_neq_f16 vcc_lo, v127, v255
+v_cmp_neq_f16 vcc, v127, v255
// GFX11: v_cmp_neq_f16_e64
-v_cmp_neq_f16 vcc_lo, vcc_hi, v255
+v_cmp_neq_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_neq_f16_e64
-v_cmp_neq_f16 vcc_lo, vcc_lo, v255
+v_cmp_neq_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_neq_f16_e64
v_cmp_nge_f16 vcc, v1, v255
@@ -513,16 +513,16 @@ v_cmp_nge_f16 vcc, vcc_hi, v255
v_cmp_nge_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_nge_f16_e64
-v_cmp_nge_f16 vcc_lo, v1, v255
+v_cmp_nge_f16 vcc, v1, v255
// GFX11: v_cmp_nge_f16_e64
-v_cmp_nge_f16 vcc_lo, v127, v255
+v_cmp_nge_f16 vcc, v127, v255
// GFX11: v_cmp_nge_f16_e64
-v_cmp_nge_f16 vcc_lo, vcc_hi, v255
+v_cmp_nge_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_nge_f16_e64
-v_cmp_nge_f16 vcc_lo, vcc_lo, v255
+v_cmp_nge_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_nge_f16_e64
v_cmp_ngt_f16 vcc, v1, v255
@@ -537,16 +537,16 @@ v_cmp_ngt_f16 vcc, vcc_hi, v255
v_cmp_ngt_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_ngt_f16_e64
-v_cmp_ngt_f16 vcc_lo, v1, v255
+v_cmp_ngt_f16 vcc, v1, v255
// GFX11: v_cmp_ngt_f16_e64
-v_cmp_ngt_f16 vcc_lo, v127, v255
+v_cmp_ngt_f16 vcc, v127, v255
// GFX11: v_cmp_ngt_f16_e64
-v_cmp_ngt_f16 vcc_lo, vcc_hi, v255
+v_cmp_ngt_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_ngt_f16_e64
-v_cmp_ngt_f16 vcc_lo, vcc_lo, v255
+v_cmp_ngt_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_ngt_f16_e64
v_cmp_nle_f16 vcc, v1, v255
@@ -561,16 +561,16 @@ v_cmp_nle_f16 vcc, vcc_hi, v255
v_cmp_nle_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_nle_f16_e64
-v_cmp_nle_f16 vcc_lo, v1, v255
+v_cmp_nle_f16 vcc, v1, v255
// GFX11: v_cmp_nle_f16_e64
-v_cmp_nle_f16 vcc_lo, v127, v255
+v_cmp_nle_f16 vcc, v127, v255
// GFX11: v_cmp_nle_f16_e64
-v_cmp_nle_f16 vcc_lo, vcc_hi, v255
+v_cmp_nle_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_nle_f16_e64
-v_cmp_nle_f16 vcc_lo, vcc_lo, v255
+v_cmp_nle_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_nle_f16_e64
v_cmp_nlg_f16 vcc, v1, v255
@@ -585,16 +585,16 @@ v_cmp_nlg_f16 vcc, vcc_hi, v255
v_cmp_nlg_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_nlg_f16_e64
-v_cmp_nlg_f16 vcc_lo, v1, v255
+v_cmp_nlg_f16 vcc, v1, v255
// GFX11: v_cmp_nlg_f16_e64
-v_cmp_nlg_f16 vcc_lo, v127, v255
+v_cmp_nlg_f16 vcc, v127, v255
// GFX11: v_cmp_nlg_f16_e64
-v_cmp_nlg_f16 vcc_lo, vcc_hi, v255
+v_cmp_nlg_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_nlg_f16_e64
-v_cmp_nlg_f16 vcc_lo, vcc_lo, v255
+v_cmp_nlg_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_nlg_f16_e64
v_cmp_nlt_f16 vcc, v1, v255
@@ -609,16 +609,16 @@ v_cmp_nlt_f16 vcc, vcc_hi, v255
v_cmp_nlt_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_nlt_f16_e64
-v_cmp_nlt_f16 vcc_lo, v1, v255
+v_cmp_nlt_f16 vcc, v1, v255
// GFX11: v_cmp_nlt_f16_e64
-v_cmp_nlt_f16 vcc_lo, v127, v255
+v_cmp_nlt_f16 vcc, v127, v255
// GFX11: v_cmp_nlt_f16_e64
-v_cmp_nlt_f16 vcc_lo, vcc_hi, v255
+v_cmp_nlt_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_nlt_f16_e64
-v_cmp_nlt_f16 vcc_lo, vcc_lo, v255
+v_cmp_nlt_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_nlt_f16_e64
v_cmp_o_f16 vcc, v1, v255
@@ -633,16 +633,16 @@ v_cmp_o_f16 vcc, vcc_hi, v255
v_cmp_o_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_o_f16_e64
-v_cmp_o_f16 vcc_lo, v1, v255
+v_cmp_o_f16 vcc, v1, v255
// GFX11: v_cmp_o_f16_e64
-v_cmp_o_f16 vcc_lo, v127, v255
+v_cmp_o_f16 vcc, v127, v255
// GFX11: v_cmp_o_f16_e64
-v_cmp_o_f16 vcc_lo, vcc_hi, v255
+v_cmp_o_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_o_f16_e64
-v_cmp_o_f16 vcc_lo, vcc_lo, v255
+v_cmp_o_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_o_f16_e64
v_cmp_t_f16 vcc, v1, v255
@@ -657,16 +657,16 @@ v_cmp_t_f16 vcc, vcc_hi, v255
v_cmp_t_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_t_f16_e64
-v_cmp_t_f16 vcc_lo, v1, v255
+v_cmp_t_f16 vcc, v1, v255
// GFX11: v_cmp_t_f16_e64
-v_cmp_t_f16 vcc_lo, v127, v255
+v_cmp_t_f16 vcc, v127, v255
// GFX11: v_cmp_t_f16_e64
-v_cmp_t_f16 vcc_lo, vcc_hi, v255
+v_cmp_t_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_t_f16_e64
-v_cmp_t_f16 vcc_lo, vcc_lo, v255
+v_cmp_t_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_t_f16_e64
v_cmp_tru_f16 vcc, v1, v255
@@ -681,16 +681,16 @@ v_cmp_tru_f16 vcc, vcc_hi, v255
v_cmp_tru_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_t_f16_e64
-v_cmp_tru_f16 vcc_lo, v1, v255
+v_cmp_tru_f16 vcc, v1, v255
// GFX11: v_cmp_t_f16_e64
-v_cmp_tru_f16 vcc_lo, v127, v255
+v_cmp_tru_f16 vcc, v127, v255
// GFX11: v_cmp_t_f16_e64
-v_cmp_tru_f16 vcc_lo, vcc_hi, v255
+v_cmp_tru_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_t_f16_e64
-v_cmp_tru_f16 vcc_lo, vcc_lo, v255
+v_cmp_tru_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_t_f16_e64
v_cmp_u_f16 vcc, v1, v255
@@ -705,196 +705,196 @@ v_cmp_u_f16 vcc, vcc_hi, v255
v_cmp_u_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_u_f16_e64
-v_cmp_u_f16 vcc_lo, v1, v255
+v_cmp_u_f16 vcc, v1, v255
// GFX11: v_cmp_u_f16_e64
-v_cmp_u_f16 vcc_lo, v127, v255
+v_cmp_u_f16 vcc, v127, v255
// GFX11: v_cmp_u_f16_e64
-v_cmp_u_f16 vcc_lo, vcc_hi, v255
+v_cmp_u_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_u_f16_e64
-v_cmp_u_f16 vcc_lo, vcc_lo, v255
+v_cmp_u_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_u_f16_e64
v_cmp_class_f16 vcc, v128, v2
// GFX11: v_cmp_class_f16_e64
-v_cmp_class_f16 vcc_lo, v128, v2
+v_cmp_class_f16 vcc, v128, v2
// GFX11: v_cmp_class_f16_e64
v_cmp_eq_f16 vcc, v128, v2
// GFX11: v_cmp_eq_f16_e64
-v_cmp_eq_f16 vcc_lo, v128, v2
+v_cmp_eq_f16 vcc, v128, v2
// GFX11: v_cmp_eq_f16_e64
v_cmp_eq_i16 vcc, v128, v2
// GFX11: v_cmp_eq_i16_e64
-v_cmp_eq_i16 vcc_lo, v128, v2
+v_cmp_eq_i16 vcc, v128, v2
// GFX11: v_cmp_eq_i16_e64
v_cmp_eq_u16 vcc, v128, v2
// GFX11: v_cmp_eq_u16_e64
-v_cmp_eq_u16 vcc_lo, v128, v2
+v_cmp_eq_u16 vcc, v128, v2
// GFX11: v_cmp_eq_u16_e64
v_cmp_f_f16 vcc, v128, v2
// GFX11: v_cmp_f_f16_e64
-v_cmp_f_f16 vcc_lo, v128, v2
+v_cmp_f_f16 vcc, v128, v2
// GFX11: v_cmp_f_f16_e64
v_cmp_ge_f16 vcc, v128, v2
// GFX11: v_cmp_ge_f16_e64
-v_cmp_ge_f16 vcc_lo, v128, v2
+v_cmp_ge_f16 vcc, v128, v2
// GFX11: v_cmp_ge_f16_e64
v_cmp_ge_i16 vcc, v128, v2
// GFX11: v_cmp_ge_i16_e64
-v_cmp_ge_i16 vcc_lo, v128, v2
+v_cmp_ge_i16 vcc, v128, v2
// GFX11: v_cmp_ge_i16_e64
v_cmp_ge_u16 vcc, v128, v2
// GFX11: v_cmp_ge_u16_e64
-v_cmp_ge_u16 vcc_lo, v128, v2
+v_cmp_ge_u16 vcc, v128, v2
// GFX11: v_cmp_ge_u16_e64
v_cmp_gt_f16 vcc, v128, v2
// GFX11: v_cmp_gt_f16_e64
-v_cmp_gt_f16 vcc_lo, v128, v2
+v_cmp_gt_f16 vcc, v128, v2
// GFX11: v_cmp_gt_f16_e64
v_cmp_gt_i16 vcc, v128, v2
// GFX11: v_cmp_gt_i16_e64
-v_cmp_gt_i16 vcc_lo, v128, v2
+v_cmp_gt_i16 vcc, v128, v2
// GFX11: v_cmp_gt_i16_e64
v_cmp_gt_u16 vcc, v128, v2
// GFX11: v_cmp_gt_u16_e64
-v_cmp_gt_u16 vcc_lo, v128, v2
+v_cmp_gt_u16 vcc, v128, v2
// GFX11: v_cmp_gt_u16_e64
v_cmp_le_f16 vcc, v128, v2
// GFX11: v_cmp_le_f16_e64
-v_cmp_le_f16 vcc_lo, v128, v2
+v_cmp_le_f16 vcc, v128, v2
// GFX11: v_cmp_le_f16_e64
v_cmp_le_i16 vcc, v128, v2
// GFX11: v_cmp_le_i16_e64
-v_cmp_le_i16 vcc_lo, v128, v2
+v_cmp_le_i16 vcc, v128, v2
// GFX11: v_cmp_le_i16_e64
v_cmp_le_u16 vcc, v128, v2
// GFX11: v_cmp_le_u16_e64
-v_cmp_le_u16 vcc_lo, v128, v2
+v_cmp_le_u16 vcc, v128, v2
// GFX11: v_cmp_le_u16_e64
v_cmp_lg_f16 vcc, v128, v2
// GFX11: v_cmp_lg_f16_e64
-v_cmp_lg_f16 vcc_lo, v128, v2
+v_cmp_lg_f16 vcc, v128, v2
// GFX11: v_cmp_lg_f16_e64
v_cmp_lt_f16 vcc, v128, v2
// GFX11: v_cmp_lt_f16_e64
-v_cmp_lt_f16 vcc_lo, v128, v2
+v_cmp_lt_f16 vcc, v128, v2
// GFX11: v_cmp_lt_f16_e64
v_cmp_lt_i16 vcc, v128, v2
// GFX11: v_cmp_lt_i16_e64
-v_cmp_lt_i16 vcc_lo, v128, v2
+v_cmp_lt_i16 vcc, v128, v2
// GFX11: v_cmp_lt_i16_e64
v_cmp_lt_u16 vcc, v128, v2
// GFX11: v_cmp_lt_u16_e64
-v_cmp_lt_u16 vcc_lo, v128, v2
+v_cmp_lt_u16 vcc, v128, v2
// GFX11: v_cmp_lt_u16_e64
v_cmp_ne_i16 vcc, v128, v2
// GFX11: v_cmp_ne_i16_e64
-v_cmp_ne_i16 vcc_lo, v128, v2
+v_cmp_ne_i16 vcc, v128, v2
// GFX11: v_cmp_ne_i16_e64
v_cmp_ne_u16 vcc, v128, v2
// GFX11: v_cmp_ne_u16_e64
-v_cmp_ne_u16 vcc_lo, v128, v2
+v_cmp_ne_u16 vcc, v128, v2
// GFX11: v_cmp_ne_u16_e64
v_cmp_neq_f16 vcc, v128, v2
// GFX11: v_cmp_neq_f16_e64
-v_cmp_neq_f16 vcc_lo, v128, v2
+v_cmp_neq_f16 vcc, v128, v2
// GFX11: v_cmp_neq_f16_e64
v_cmp_nge_f16 vcc, v128, v2
// GFX11: v_cmp_nge_f16_e64
-v_cmp_nge_f16 vcc_lo, v128, v2
+v_cmp_nge_f16 vcc, v128, v2
// GFX11: v_cmp_nge_f16_e64
v_cmp_ngt_f16 vcc, v128, v2
// GFX11: v_cmp_ngt_f16_e64
-v_cmp_ngt_f16 vcc_lo, v128, v2
+v_cmp_ngt_f16 vcc, v128, v2
// GFX11: v_cmp_ngt_f16_e64
v_cmp_nle_f16 vcc, v128, v2
// GFX11: v_cmp_nle_f16_e64
-v_cmp_nle_f16 vcc_lo, v128, v2
+v_cmp_nle_f16 vcc, v128, v2
// GFX11: v_cmp_nle_f16_e64
v_cmp_nlg_f16 vcc, v128, v2
// GFX11: v_cmp_nlg_f16_e64
-v_cmp_nlg_f16 vcc_lo, v128, v2
+v_cmp_nlg_f16 vcc, v128, v2
// GFX11: v_cmp_nlg_f16_e64
v_cmp_nlt_f16 vcc, v128, v2
// GFX11: v_cmp_nlt_f16_e64
-v_cmp_nlt_f16 vcc_lo, v128, v2
+v_cmp_nlt_f16 vcc, v128, v2
// GFX11: v_cmp_nlt_f16_e64
v_cmp_o_f16 vcc, v128, v2
// GFX11: v_cmp_o_f16_e64
-v_cmp_o_f16 vcc_lo, v128, v2
+v_cmp_o_f16 vcc, v128, v2
// GFX11: v_cmp_o_f16_e64
v_cmp_t_f16 vcc, v128, v2
// GFX11: v_cmp_t_f16_e64
-v_cmp_t_f16 vcc_lo, v128, v2
+v_cmp_t_f16 vcc, v128, v2
// GFX11: v_cmp_t_f16_e64
v_cmp_tru_f16 vcc, v128, v2
// GFX11: v_cmp_t_f16_e64
-v_cmp_tru_f16 vcc_lo, v128, v2
+v_cmp_tru_f16 vcc, v128, v2
// GFX11: v_cmp_t_f16_e64
v_cmp_u_f16 vcc, v128, v2
// GFX11: v_cmp_u_f16_e64
-v_cmp_u_f16 vcc_lo, v128, v2
+v_cmp_u_f16 vcc, v128, v2
// GFX11: v_cmp_u_f16_e64
v_cmp_class_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -903,7 +903,7 @@ v_cmp_class_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_class_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_class_f16_e64
-v_cmp_class_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_class_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_class_f16_e64
v_cmp_eq_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -912,10 +912,10 @@ v_cmp_eq_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_eq_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_eq_f16_e64
-v_cmp_eq_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_eq_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_eq_f16_e64
-v_cmp_eq_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_eq_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_eq_f16_e64
v_cmp_eq_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -924,10 +924,10 @@ v_cmp_eq_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_eq_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_eq_i16_e64
-v_cmp_eq_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_eq_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_eq_i16_e64
-v_cmp_eq_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_eq_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_eq_i16_e64
v_cmp_eq_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -936,10 +936,10 @@ v_cmp_eq_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_eq_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_eq_u16_e64
-v_cmp_eq_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_eq_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_eq_u16_e64
-v_cmp_eq_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_eq_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_eq_u16_e64
v_cmp_f_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -948,10 +948,10 @@ v_cmp_f_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_f_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_f_f16_e64
-v_cmp_f_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_f_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_f_f16_e64
-v_cmp_f_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_f_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_f_f16_e64
v_cmp_ge_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -960,10 +960,10 @@ v_cmp_ge_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_ge_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ge_f16_e64
-v_cmp_ge_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_ge_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ge_f16_e64
-v_cmp_ge_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_ge_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ge_f16_e64
v_cmp_ge_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -972,10 +972,10 @@ v_cmp_ge_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_ge_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ge_i16_e64
-v_cmp_ge_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_ge_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ge_i16_e64
-v_cmp_ge_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_ge_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ge_i16_e64
v_cmp_ge_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -984,10 +984,10 @@ v_cmp_ge_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_ge_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ge_u16_e64
-v_cmp_ge_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_ge_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ge_u16_e64
-v_cmp_ge_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_ge_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ge_u16_e64
v_cmp_gt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -996,10 +996,10 @@ v_cmp_gt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_gt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_gt_f16_e64
-v_cmp_gt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_gt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_gt_f16_e64
-v_cmp_gt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_gt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_gt_f16_e64
v_cmp_gt_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -1008,10 +1008,10 @@ v_cmp_gt_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_gt_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_gt_i16_e64
-v_cmp_gt_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_gt_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_gt_i16_e64
-v_cmp_gt_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_gt_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_gt_i16_e64
v_cmp_gt_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -1020,10 +1020,10 @@ v_cmp_gt_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_gt_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_gt_u16_e64
-v_cmp_gt_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_gt_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_gt_u16_e64
-v_cmp_gt_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_gt_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_gt_u16_e64
v_cmp_le_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -1032,10 +1032,10 @@ v_cmp_le_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_le_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_le_f16_e64
-v_cmp_le_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_le_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_le_f16_e64
-v_cmp_le_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_le_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_le_f16_e64
v_cmp_le_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -1044,10 +1044,10 @@ v_cmp_le_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_le_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_le_i16_e64
-v_cmp_le_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_le_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_le_i16_e64
-v_cmp_le_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_le_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_le_i16_e64
v_cmp_le_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -1056,10 +1056,10 @@ v_cmp_le_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_le_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_le_u16_e64
-v_cmp_le_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_le_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_le_u16_e64
-v_cmp_le_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_le_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_le_u16_e64
v_cmp_lg_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -1068,10 +1068,10 @@ v_cmp_lg_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_lg_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_lg_f16_e64
-v_cmp_lg_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_lg_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_lg_f16_e64
-v_cmp_lg_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_lg_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_lg_f16_e64
v_cmp_lt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -1080,10 +1080,10 @@ v_cmp_lt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_lt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_lt_f16_e64
-v_cmp_lt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_lt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_lt_f16_e64
-v_cmp_lt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_lt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_lt_f16_e64
v_cmp_lt_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -1092,10 +1092,10 @@ v_cmp_lt_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_lt_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_lt_i16_e64
-v_cmp_lt_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_lt_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_lt_i16_e64
-v_cmp_lt_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_lt_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_lt_i16_e64
v_cmp_lt_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -1104,10 +1104,10 @@ v_cmp_lt_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_lt_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_lt_u16_e64
-v_cmp_lt_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_lt_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_lt_u16_e64
-v_cmp_lt_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_lt_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_lt_u16_e64
v_cmp_ne_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -1116,10 +1116,10 @@ v_cmp_ne_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_ne_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ne_i16_e64
-v_cmp_ne_i16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_ne_i16 vcc, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ne_i16_e64
-v_cmp_ne_i16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_ne_i16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ne_i16_e64
v_cmp_ne_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -1128,10 +1128,10 @@ v_cmp_ne_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_ne_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ne_u16_e64
-v_cmp_ne_u16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_ne_u16 vcc, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ne_u16_e64
-v_cmp_ne_u16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_ne_u16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ne_u16_e64
v_cmp_neq_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -1140,10 +1140,10 @@ v_cmp_neq_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_neq_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_neq_f16_e64
-v_cmp_neq_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_neq_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_neq_f16_e64
-v_cmp_neq_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_neq_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_neq_f16_e64
v_cmp_nge_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -1152,10 +1152,10 @@ v_cmp_nge_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_nge_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_nge_f16_e64
-v_cmp_nge_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_nge_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_nge_f16_e64
-v_cmp_nge_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_nge_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_nge_f16_e64
v_cmp_ngt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -1164,10 +1164,10 @@ v_cmp_ngt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_ngt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ngt_f16_e64
-v_cmp_ngt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_ngt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ngt_f16_e64
-v_cmp_ngt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_ngt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ngt_f16_e64
v_cmp_nle_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -1176,10 +1176,10 @@ v_cmp_nle_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_nle_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_nle_f16_e64
-v_cmp_nle_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_nle_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_nle_f16_e64
-v_cmp_nle_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_nle_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_nle_f16_e64
v_cmp_nlg_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -1188,10 +1188,10 @@ v_cmp_nlg_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_nlg_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_nlg_f16_e64
-v_cmp_nlg_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_nlg_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_nlg_f16_e64
-v_cmp_nlg_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_nlg_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_nlg_f16_e64
v_cmp_nlt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -1200,10 +1200,10 @@ v_cmp_nlt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_nlt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_nlt_f16_e64
-v_cmp_nlt_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_nlt_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_nlt_f16_e64
-v_cmp_nlt_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_nlt_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_nlt_f16_e64
v_cmp_o_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -1212,10 +1212,10 @@ v_cmp_o_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_o_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_o_f16_e64
-v_cmp_o_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_o_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_o_f16_e64
-v_cmp_o_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_o_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_o_f16_e64
v_cmp_t_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -1224,10 +1224,10 @@ v_cmp_t_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_t_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_t_f16_e64
-v_cmp_t_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_t_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_t_f16_e64
-v_cmp_t_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_t_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_t_f16_e64
v_cmp_tru_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -1236,10 +1236,10 @@ v_cmp_tru_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_tru_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_t_f16_e64
-v_cmp_tru_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_tru_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_t_f16_e64
-v_cmp_tru_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_tru_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_t_f16_e64
v_cmp_u_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
@@ -1248,190 +1248,190 @@ v_cmp_u_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
v_cmp_u_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_u_f16_e64
-v_cmp_u_f16 vcc_lo, v1, v255 quad_perm:[3,2,1,0]
+v_cmp_u_f16 vcc, v1, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_u_f16_e64
-v_cmp_u_f16 vcc_lo, v127, v255 quad_perm:[3,2,1,0]
+v_cmp_u_f16 vcc, v127, v255 quad_perm:[3,2,1,0]
// GFX11: v_cmp_u_f16_e64
v_cmp_class_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_class_f16_e64
-v_cmp_class_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_class_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_class_f16_e64
v_cmp_eq_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_eq_f16_e64
-v_cmp_eq_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_eq_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_eq_f16_e64
v_cmp_eq_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_eq_i16_e64
-v_cmp_eq_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_eq_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_eq_i16_e64
v_cmp_eq_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_eq_u16_e64
-v_cmp_eq_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_eq_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_eq_u16_e64
v_cmp_f_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_f_f16_e64
-v_cmp_f_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_f_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_f_f16_e64
v_cmp_ge_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ge_f16_e64
-v_cmp_ge_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_ge_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ge_f16_e64
v_cmp_ge_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ge_i16_e64
-v_cmp_ge_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_ge_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ge_i16_e64
v_cmp_ge_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ge_u16_e64
-v_cmp_ge_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_ge_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ge_u16_e64
v_cmp_gt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_gt_f16_e64
-v_cmp_gt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_gt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_gt_f16_e64
v_cmp_gt_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_gt_i16_e64
-v_cmp_gt_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_gt_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_gt_i16_e64
v_cmp_gt_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_gt_u16_e64
-v_cmp_gt_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_gt_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_gt_u16_e64
v_cmp_le_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_le_f16_e64
-v_cmp_le_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_le_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_le_f16_e64
v_cmp_le_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_le_i16_e64
-v_cmp_le_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_le_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_le_i16_e64
v_cmp_le_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_le_u16_e64
-v_cmp_le_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_le_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_le_u16_e64
v_cmp_lg_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_lg_f16_e64
-v_cmp_lg_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_lg_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_lg_f16_e64
v_cmp_lt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_lt_f16_e64
-v_cmp_lt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_lt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_lt_f16_e64
v_cmp_lt_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_lt_i16_e64
-v_cmp_lt_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_lt_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_lt_i16_e64
v_cmp_lt_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_lt_u16_e64
-v_cmp_lt_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_lt_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_lt_u16_e64
v_cmp_ne_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ne_i16_e64
-v_cmp_ne_i16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_ne_i16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ne_i16_e64
v_cmp_ne_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ne_u16_e64
-v_cmp_ne_u16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_ne_u16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ne_u16_e64
v_cmp_neq_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_neq_f16_e64
-v_cmp_neq_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_neq_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_neq_f16_e64
v_cmp_nge_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_nge_f16_e64
-v_cmp_nge_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_nge_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_nge_f16_e64
v_cmp_ngt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ngt_f16_e64
-v_cmp_ngt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_ngt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_ngt_f16_e64
v_cmp_nle_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_nle_f16_e64
-v_cmp_nle_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_nle_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_nle_f16_e64
v_cmp_nlg_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_nlg_f16_e64
-v_cmp_nlg_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_nlg_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_nlg_f16_e64
v_cmp_nlt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_nlt_f16_e64
-v_cmp_nlt_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_nlt_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_nlt_f16_e64
v_cmp_o_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_o_f16_e64
-v_cmp_o_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_o_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_o_f16_e64
v_cmp_t_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_t_f16_e64
-v_cmp_t_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_t_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_t_f16_e64
v_cmp_tru_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_t_f16_e64
-v_cmp_tru_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_tru_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_t_f16_e64
v_cmp_u_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_u_f16_e64
-v_cmp_u_f16 vcc_lo, v128, v2 quad_perm:[3,2,1,0]
+v_cmp_u_f16 vcc, v128, v2 quad_perm:[3,2,1,0]
// GFX11: v_cmp_u_f16_e64
v_cmp_class_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1440,7 +1440,7 @@ v_cmp_class_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_class_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_class_f16_e64
-v_cmp_class_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_class_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_class_f16_e64
v_cmp_eq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1449,10 +1449,10 @@ v_cmp_eq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_eq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_eq_f16_e64
-v_cmp_eq_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_eq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_eq_f16_e64
-v_cmp_eq_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_eq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_eq_f16_e64
v_cmp_eq_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1461,10 +1461,10 @@ v_cmp_eq_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_eq_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_eq_i16_e64
-v_cmp_eq_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_eq_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_eq_i16_e64
-v_cmp_eq_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_eq_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_eq_i16_e64
v_cmp_eq_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1473,10 +1473,10 @@ v_cmp_eq_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_eq_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_eq_u16_e64
-v_cmp_eq_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_eq_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_eq_u16_e64
-v_cmp_eq_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_eq_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_eq_u16_e64
v_cmp_f_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1485,10 +1485,10 @@ v_cmp_f_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_f_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_f_f16_e64
-v_cmp_f_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_f_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_f_f16_e64
-v_cmp_f_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_f_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_f_f16_e64
v_cmp_ge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1497,10 +1497,10 @@ v_cmp_ge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_ge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ge_f16_e64
-v_cmp_ge_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ge_f16_e64
-v_cmp_ge_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ge_f16_e64
v_cmp_ge_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1509,10 +1509,10 @@ v_cmp_ge_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_ge_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ge_i16_e64
-v_cmp_ge_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ge_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ge_i16_e64
-v_cmp_ge_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ge_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ge_i16_e64
v_cmp_ge_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1521,10 +1521,10 @@ v_cmp_ge_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_ge_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ge_u16_e64
-v_cmp_ge_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ge_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ge_u16_e64
-v_cmp_ge_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ge_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ge_u16_e64
v_cmp_gt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1533,10 +1533,10 @@ v_cmp_gt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_gt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_gt_f16_e64
-v_cmp_gt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_gt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_gt_f16_e64
-v_cmp_gt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_gt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_gt_f16_e64
v_cmp_gt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1545,10 +1545,10 @@ v_cmp_gt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_gt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_gt_i16_e64
-v_cmp_gt_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_gt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_gt_i16_e64
-v_cmp_gt_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_gt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_gt_i16_e64
v_cmp_gt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1557,10 +1557,10 @@ v_cmp_gt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_gt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_gt_u16_e64
-v_cmp_gt_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_gt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_gt_u16_e64
-v_cmp_gt_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_gt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_gt_u16_e64
v_cmp_le_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1569,10 +1569,10 @@ v_cmp_le_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_le_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_le_f16_e64
-v_cmp_le_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_le_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_le_f16_e64
-v_cmp_le_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_le_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_le_f16_e64
v_cmp_le_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1581,10 +1581,10 @@ v_cmp_le_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_le_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_le_i16_e64
-v_cmp_le_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_le_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_le_i16_e64
-v_cmp_le_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_le_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_le_i16_e64
v_cmp_le_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1593,10 +1593,10 @@ v_cmp_le_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_le_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_le_u16_e64
-v_cmp_le_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_le_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_le_u16_e64
-v_cmp_le_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_le_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_le_u16_e64
v_cmp_lg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1605,10 +1605,10 @@ v_cmp_lg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_lg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_lg_f16_e64
-v_cmp_lg_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_lg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_lg_f16_e64
-v_cmp_lg_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_lg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_lg_f16_e64
v_cmp_lt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1617,10 +1617,10 @@ v_cmp_lt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_lt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_lt_f16_e64
-v_cmp_lt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_lt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_lt_f16_e64
-v_cmp_lt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_lt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_lt_f16_e64
v_cmp_lt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1629,10 +1629,10 @@ v_cmp_lt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_lt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_lt_i16_e64
-v_cmp_lt_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_lt_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_lt_i16_e64
-v_cmp_lt_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_lt_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_lt_i16_e64
v_cmp_lt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1641,10 +1641,10 @@ v_cmp_lt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_lt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_lt_u16_e64
-v_cmp_lt_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_lt_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_lt_u16_e64
-v_cmp_lt_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_lt_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_lt_u16_e64
v_cmp_ne_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1653,10 +1653,10 @@ v_cmp_ne_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_ne_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ne_i16_e64
-v_cmp_ne_i16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ne_i16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ne_i16_e64
-v_cmp_ne_i16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ne_i16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ne_i16_e64
v_cmp_ne_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1665,10 +1665,10 @@ v_cmp_ne_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_ne_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ne_u16_e64
-v_cmp_ne_u16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ne_u16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ne_u16_e64
-v_cmp_ne_u16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ne_u16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ne_u16_e64
v_cmp_neq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1677,10 +1677,10 @@ v_cmp_neq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_neq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_neq_f16_e64
-v_cmp_neq_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_neq_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_neq_f16_e64
-v_cmp_neq_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_neq_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_neq_f16_e64
v_cmp_nge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1689,10 +1689,10 @@ v_cmp_nge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_nge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_nge_f16_e64
-v_cmp_nge_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_nge_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_nge_f16_e64
-v_cmp_nge_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_nge_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_nge_f16_e64
v_cmp_ngt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1701,10 +1701,10 @@ v_cmp_ngt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_ngt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ngt_f16_e64
-v_cmp_ngt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ngt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ngt_f16_e64
-v_cmp_ngt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ngt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ngt_f16_e64
v_cmp_nle_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1713,10 +1713,10 @@ v_cmp_nle_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_nle_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_nle_f16_e64
-v_cmp_nle_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_nle_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_nle_f16_e64
-v_cmp_nle_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_nle_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_nle_f16_e64
v_cmp_nlg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1725,10 +1725,10 @@ v_cmp_nlg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_nlg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_nlg_f16_e64
-v_cmp_nlg_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_nlg_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_nlg_f16_e64
-v_cmp_nlg_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_nlg_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_nlg_f16_e64
v_cmp_nlt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1737,10 +1737,10 @@ v_cmp_nlt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_nlt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_nlt_f16_e64
-v_cmp_nlt_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_nlt_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_nlt_f16_e64
-v_cmp_nlt_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_nlt_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_nlt_f16_e64
v_cmp_o_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1749,10 +1749,10 @@ v_cmp_o_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_o_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_o_f16_e64
-v_cmp_o_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_o_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_o_f16_e64
-v_cmp_o_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_o_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_o_f16_e64
v_cmp_t_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1761,10 +1761,10 @@ v_cmp_t_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_t_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_t_f16_e64
-v_cmp_t_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_t_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_t_f16_e64
-v_cmp_t_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_t_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_t_f16_e64
v_cmp_tru_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1773,10 +1773,10 @@ v_cmp_tru_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_tru_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_t_f16_e64
-v_cmp_tru_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_tru_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_t_f16_e64
-v_cmp_tru_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_tru_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_t_f16_e64
v_cmp_u_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
@@ -1785,189 +1785,189 @@ v_cmp_u_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
v_cmp_u_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_u_f16_e64
-v_cmp_u_f16 vcc_lo, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_u_f16 vcc, v1, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_u_f16_e64
-v_cmp_u_f16 vcc_lo, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_u_f16 vcc, v127, v255 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_u_f16_e64
v_cmp_class_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_class_f16_e64
-v_cmp_class_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_class_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_class_f16_e64
v_cmp_eq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_eq_f16_e64
-v_cmp_eq_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_eq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_eq_f16_e64
v_cmp_eq_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_eq_i16_e64
-v_cmp_eq_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_eq_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_eq_i16_e64
v_cmp_eq_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_eq_u16_e64
-v_cmp_eq_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_eq_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_eq_u16_e64
v_cmp_f_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_f_f16_e64
-v_cmp_f_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_f_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_f_f16_e64
v_cmp_ge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ge_f16_e64
-v_cmp_ge_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ge_f16_e64
v_cmp_ge_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ge_i16_e64
-v_cmp_ge_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ge_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ge_i16_e64
v_cmp_ge_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ge_u16_e64
-v_cmp_ge_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ge_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ge_u16_e64
v_cmp_gt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_gt_f16_e64
-v_cmp_gt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_gt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_gt_f16_e64
v_cmp_gt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_gt_i16_e64
-v_cmp_gt_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_gt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_gt_i16_e64
v_cmp_gt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_gt_u16_e64
-v_cmp_gt_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_gt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_gt_u16_e64
v_cmp_le_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_le_f16_e64
-v_cmp_le_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_le_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_le_f16_e64
v_cmp_le_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_le_i16_e64
-v_cmp_le_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_le_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_le_i16_e64
v_cmp_le_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_le_u16_e64
-v_cmp_le_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_le_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_le_u16_e64
v_cmp_lg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_lg_f16_e64
-v_cmp_lg_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_lg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_lg_f16_e64
v_cmp_lt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_lt_f16_e64
-v_cmp_lt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_lt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_lt_f16_e64
v_cmp_lt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_lt_i16_e64
-v_cmp_lt_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_lt_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_lt_i16_e64
v_cmp_lt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_lt_u16_e64
-v_cmp_lt_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_lt_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_lt_u16_e64
v_cmp_ne_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ne_i16_e64
-v_cmp_ne_i16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ne_i16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ne_i16_e64
v_cmp_ne_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ne_u16_e64
-v_cmp_ne_u16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ne_u16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ne_u16_e64
v_cmp_neq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_neq_f16_e64
-v_cmp_neq_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_neq_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_neq_f16_e64
v_cmp_nge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_nge_f16_e64
-v_cmp_nge_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_nge_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_nge_f16_e64
v_cmp_ngt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ngt_f16_e64
-v_cmp_ngt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_ngt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_ngt_f16_e64
v_cmp_nle_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_nle_f16_e64
-v_cmp_nle_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_nle_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_nle_f16_e64
v_cmp_nlg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_nlg_f16_e64
-v_cmp_nlg_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_nlg_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_nlg_f16_e64
v_cmp_nlt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_nlt_f16_e64
-v_cmp_nlt_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_nlt_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_nlt_f16_e64
v_cmp_o_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_o_f16_e64
-v_cmp_o_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_o_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_o_f16_e64
v_cmp_t_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_t_f16_e64
-v_cmp_t_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_t_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_t_f16_e64
v_cmp_tru_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_t_f16_e64
-v_cmp_tru_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_tru_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_t_f16_e64
v_cmp_u_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_u_f16_e64
-v_cmp_u_f16 vcc_lo, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
+v_cmp_u_f16 vcc, v128, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cmp_u_f16_e64
diff --git a/llvm/test/MC/AMDGPU/wave32.s b/llvm/test/MC/AMDGPU/wave32.s
index c52693076e2c5..25bb4fd84433b 100644
--- a/llvm/test/MC/AMDGPU/wave32.s
+++ b/llvm/test/MC/AMDGPU/wave32.s
@@ -1,7 +1,7 @@
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -show-encoding %s | FileCheck -check-prefix=GFX1032 %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck -check-prefix=GFX1064 %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 %s 2>&1 | FileCheck -check-prefix=GFX1032-ERR --implicit-check-not=error: %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 %s 2>&1 | FileCheck -check-prefix=GFX1064-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -show-encoding %s | FileCheck -check-prefix=GFX1032 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -show-encoding %s | FileCheck -check-prefix=GFX1064 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 %s 2>&1 | FileCheck -check-prefix=GFX1032-ERR --implicit-check-not=error: %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 %s 2>&1 | FileCheck -check-prefix=GFX1064-ERR --implicit-check-not=error: %s
v_cmp_ge_i32_e32 s0, v0
// GFX1032: v_cmp_ge_i32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x0c,0x7d]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10-wave32.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10-wave32.txt
index 78ca1bbdacf29..31fc10174bb0b 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10-wave32.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10-wave32.txt
@@ -1,5 +1,5 @@
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX1032 %s
-# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64,-wavefrontsize32 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX1064 %s
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX1032 %s
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -check-prefix=GFX1064 %s
# GFX1032: v_cmp_lt_f32_e32 vcc_lo, s2, v4
# GFX1064: v_cmp_lt_f32_e32 vcc, s2, v4
>From 55dda8d68c366fa876a549a8e3b7778aaa787161 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Thu, 11 Jul 2024 00:57:39 -0700
Subject: [PATCH 2/4] Just add FeatureWavefrontSize32 unconditionally
Older targets just have FeatureWavefrontSize64 in their definitions.
---
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 11 +++++------
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 11 +++++------
.../Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp | 8 ++++----
3 files changed, 14 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index a59893d3cf85d..929d75f2d0757 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -106,12 +106,11 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
}
if (!hasFeature(AMDGPU::FeatureWavefrontSize32) &&
- !hasFeature(AMDGPU::FeatureWavefrontSize64)) {
- if (getGeneration() >= AMDGPUSubtarget::GFX10)
- ToggleFeature(AMDGPU::FeatureWavefrontSize32);
- else
- ToggleFeature(AMDGPU::FeatureWavefrontSize64);
- }
+ !hasFeature(AMDGPU::FeatureWavefrontSize64))
+ // If there is no default wave size it must be a generation before gfx9,
+ // these have FeatureWavefrontSize64 in their definition already. For gfx10+
+ // set wave32 as a default.
+ ToggleFeature(AMDGPU::FeatureWavefrontSize32);
// We don't support FP64 for EG/NI atm.
assert(!hasFP64() || (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS));
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 1c3925cfad464..0a2fd21bd937b 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1411,12 +1411,11 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
FeatureBitset FB = getFeatureBits();
if (!FB[AMDGPU::FeatureWavefrontSize64] &&
- !FB[AMDGPU::FeatureWavefrontSize32]) {
- if (ISA.Major >= 10)
- copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize32);
- else
- copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize64);
- }
+ !FB[AMDGPU::FeatureWavefrontSize32])
+ // If there is no default wave size it must be a generation before gfx9,
+ // these have FeatureWavefrontSize64 in their definition already. For
+ // gfx10+ set wave32 as a default.
+ copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize32);
setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 57d717dd9e634..3f2c3d4d2be8d 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -50,10 +50,10 @@ static const MCSubtargetInfo &addDefaultWaveSize(const MCSubtargetInfo &STI,
if (!STI.hasFeature(AMDGPU::FeatureWavefrontSize64) &&
!STI.hasFeature(AMDGPU::FeatureWavefrontSize32)) {
MCSubtargetInfo &STICopy = Ctx.getSubtargetCopy(STI);
- if (AMDGPU::isGFX10Plus(STI))
- STICopy.ToggleFeature(AMDGPU::FeatureWavefrontSize32);
- else
- STICopy.ToggleFeature(AMDGPU::FeatureWavefrontSize64);
+ // If there is no default wave size it must be a generation before gfx9,
+ // these have FeatureWavefrontSize64 in their definition already. For gfx10+
+ // set wave32 as a default.
+ STICopy.ToggleFeature(AMDGPU::FeatureWavefrontSize32);
return STICopy;
}
>From d09e31ca68dddc3a0f34a22821e1232bec2aa971 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Thu, 11 Jul 2024 01:17:27 -0700
Subject: [PATCH 3/4] Move getIsaVersion to its original place
---
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 0a2fd21bd937b..462ff5846c4bd 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1408,7 +1408,6 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
copySTI().ToggleFeature("southern-islands");
}
- AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
FeatureBitset FB = getFeatureBits();
if (!FB[AMDGPU::FeatureWavefrontSize64] &&
!FB[AMDGPU::FeatureWavefrontSize32])
@@ -1419,6 +1418,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
+ AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
>From f5ebe7c4861a3036b0df36fc9f663926b8a0c8da Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Thu, 11 Jul 2024 01:30:35 -0700
Subject: [PATCH 4/4] Readded braces.
---
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 3 ++-
llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 3 ++-
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 929d75f2d0757..975cda1587bd5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -106,11 +106,12 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
}
if (!hasFeature(AMDGPU::FeatureWavefrontSize32) &&
- !hasFeature(AMDGPU::FeatureWavefrontSize64))
+ !hasFeature(AMDGPU::FeatureWavefrontSize64)) {
// If there is no default wave size it must be a generation before gfx9,
// these have FeatureWavefrontSize64 in their definition already. For gfx10+
// set wave32 as a default.
ToggleFeature(AMDGPU::FeatureWavefrontSize32);
+ }
// We don't support FP64 for EG/NI atm.
assert(!hasFP64() || (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS));
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 462ff5846c4bd..9c6df2120ebc7 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1410,11 +1410,12 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
FeatureBitset FB = getFeatureBits();
if (!FB[AMDGPU::FeatureWavefrontSize64] &&
- !FB[AMDGPU::FeatureWavefrontSize32])
+ !FB[AMDGPU::FeatureWavefrontSize32]) {
// If there is no default wave size it must be a generation before gfx9,
// these have FeatureWavefrontSize64 in their definition already. For
// gfx10+ set wave32 as a default.
copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize32);
+ }
setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
More information about the llvm-commits
mailing list