[llvm] [AMDGPU] Remove wavefrontsize feature from GFX10+ (PR #98400)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 10 14:58:40 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mc
Author: Stanislav Mekhanoshin (rampitec)
<details>
<summary>Changes</summary>
Processor definition shall not include a default feature which may be switched off by a different wave size. This allows not to write -mattr=-wavefrontsize32,+wavefrontsize64 in tests.
---
Patch is 71.18 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/98400.diff
10 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (-3)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (+8)
- (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+10-1)
- (modified) llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (+18-2)
- (modified) llvm/test/CodeGen/AMDGPU/check-subtarget-features.ll (-2)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll (+8-8)
- (modified) llvm/test/CodeGen/AMDGPU/unknown-processor.ll (+1-1)
- (modified) llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s (+327-327)
- (modified) llvm/test/MC/AMDGPU/wave32.s (+4-4)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx10-wave32.txt (+2-2)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 94e8e77b3c052..dfc8eaea66f7b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1464,7 +1464,6 @@ def FeatureISAVersion10_Common : FeatureSet<
FeatureLDSBankCount32,
FeatureDLInsts,
FeatureNSAEncoding,
- FeatureWavefrontSize32,
FeatureBackOffBarrier]>;
def FeatureISAVersion10_1_Common : FeatureSet<
@@ -1548,7 +1547,6 @@ def FeatureISAVersion11_Common : FeatureSet<
FeatureDot10Insts,
FeatureNSAEncoding,
FeaturePartialNSAEncoding,
- FeatureWavefrontSize32,
FeatureShaderCyclesRegister,
FeatureArchitectedFlatScratch,
FeatureAtomicFaddRtnInsts,
@@ -1625,7 +1623,6 @@ def FeatureISAVersion12 : FeatureSet<
FeatureDot11Insts,
FeatureNSAEncoding,
FeaturePartialNSAEncoding,
- FeatureWavefrontSize32,
FeatureShaderCyclesHiLoRegisters,
FeatureArchitectedFlatScratch,
FeatureArchitectedSGPRs,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 21fe1bc31a27e..a59893d3cf85d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -105,6 +105,14 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
: AMDGPUSubtarget::SOUTHERN_ISLANDS;
}
+ if (!hasFeature(AMDGPU::FeatureWavefrontSize32) &&
+ !hasFeature(AMDGPU::FeatureWavefrontSize64)) {
+ if (getGeneration() >= AMDGPUSubtarget::GFX10)
+ ToggleFeature(AMDGPU::FeatureWavefrontSize32);
+ else
+ ToggleFeature(AMDGPU::FeatureWavefrontSize64);
+ }
+
// We don't support FP64 for EG/NI atm.
assert(!hasFP64() || (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS));
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index b08957d22ee74..1c3925cfad464 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1408,9 +1408,18 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
copySTI().ToggleFeature("southern-islands");
}
+ AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
+ FeatureBitset FB = getFeatureBits();
+ if (!FB[AMDGPU::FeatureWavefrontSize64] &&
+ !FB[AMDGPU::FeatureWavefrontSize32]) {
+ if (ISA.Major >= 10)
+ copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize32);
+ else
+ copySTI().ToggleFeature(AMDGPU::FeatureWavefrontSize64);
+ }
+
setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
- AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 695b2f246a778..57d717dd9e634 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -45,10 +45,26 @@ using namespace llvm;
using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
+static const MCSubtargetInfo &addDefaultWaveSize(const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ if (!STI.hasFeature(AMDGPU::FeatureWavefrontSize64) &&
+ !STI.hasFeature(AMDGPU::FeatureWavefrontSize32)) {
+ MCSubtargetInfo &STICopy = Ctx.getSubtargetCopy(STI);
+ if (AMDGPU::isGFX10Plus(STI))
+ STICopy.ToggleFeature(AMDGPU::FeatureWavefrontSize32);
+ else
+ STICopy.ToggleFeature(AMDGPU::FeatureWavefrontSize64);
+ return STICopy;
+ }
+
+ return STI;
+}
+
AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
MCContext &Ctx, MCInstrInfo const *MCII)
- : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
- MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
+ : MCDisassembler(addDefaultWaveSize(STI, Ctx), Ctx), MCII(MCII),
+ MRI(*Ctx.getRegisterInfo()), MAI(*Ctx.getAsmInfo()),
+ TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
// ToDo: AMDGPUDisassembler supports only VI ISA.
if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
diff --git a/llvm/test/CodeGen/AMDGPU/check-subtarget-features.ll b/llvm/test/CodeGen/AMDGPU/check-subtarget-features.ll
index c246939811046..95ae8a6adfdf8 100644
--- a/llvm/test/CodeGen/AMDGPU/check-subtarget-features.ll
+++ b/llvm/test/CodeGen/AMDGPU/check-subtarget-features.ll
@@ -1,5 +1,3 @@
-; RUN: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,-wavefrontsize64 < %s 2>&1 | FileCheck %s -check-prefix=ERR -implicit-check-not=error:
-; RUN: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,-wavefrontsize64 < %s 2>&1 | FileCheck %s -check-prefix=ERR -implicit-check-not=error:
; RUN: not llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+wavefrontsize64 < %s 2>&1 | FileCheck %s -check-prefix=ERR -implicit-check-not=error:
; RUN: not llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,+wavefrontsize64 < %s 2>&1 | FileCheck %s -check-prefix=ERR -implicit-check-not=error:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll
index 270ab5fee1125..824d3708c027d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll
@@ -1,8 +1,8 @@
; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W32 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,W32 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W32 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GCN,W32 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,W64 %s
; RUN: opt -O3 -S < %s | FileCheck -check-prefix=OPT %s
; RUN: opt -mtriple=amdgcn-- -O3 -S < %s | FileCheck -check-prefix=OPT %s
@@ -10,10 +10,10 @@
; RUN: opt -mtriple=amdgcn-- -passes='default<O3>' -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s
; RUN: opt -mtriple=amdgcn-- -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
; RUN: opt -mtriple=amdgcn-- -mcpu=tonga -O3 -S < %s | FileCheck -check-prefix=OPT %s
-; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize32,-wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
-; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=-wavefrontsize32,+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
-; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize32,-wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
-; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=-wavefrontsize32,+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1010 -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize32 -S < %s | FileCheck -check-prefix=OPT %s
+; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -O3 -mattr=+wavefrontsize64 -S < %s | FileCheck -check-prefix=OPT %s
; GCN-LABEL: {{^}}fold_wavefrontsize:
; OPT-LABEL: define amdgpu_kernel void @fold_wavefrontsize(
diff --git a/llvm/test/CodeGen/AMDGPU/unknown-processor.ll b/llvm/test/CodeGen/AMDGPU/unknown-processor.ll
index 683ba98e52cf1..9cfba8b2e5c04 100644
--- a/llvm/test/CodeGen/AMDGPU/unknown-processor.ll
+++ b/llvm/test/CodeGen/AMDGPU/unknown-processor.ll
@@ -1,4 +1,4 @@
-; RUN: not llc -mtriple=amdgcn-- -mcpu=unknown -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=unknown -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR -check-prefix=GCN %s
; RUN: llc -mtriple=r600-- -mcpu=unknown -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR -check-prefix=R600 %s
target datalayout = "A5"
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s
index b16caed8b275f..75f20b0c7f0c4 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vopc_t16_promote.s
@@ -12,13 +12,13 @@ v_cmp_class_f16 vcc, vcc_hi, v255
v_cmp_class_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_class_f16_e64
-v_cmp_class_f16 vcc_lo, v127, v255
+v_cmp_class_f16 vcc, v127, v255
// GFX11: v_cmp_class_f16_e64
-v_cmp_class_f16 vcc_lo, vcc_hi, v255
+v_cmp_class_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_class_f16_e64
-v_cmp_class_f16 vcc_lo, vcc_lo, v255
+v_cmp_class_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_class_f16_e64
v_cmp_eq_f16 vcc, v1, v255
@@ -33,16 +33,16 @@ v_cmp_eq_f16 vcc, vcc_hi, v255
v_cmp_eq_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_eq_f16_e64
-v_cmp_eq_f16 vcc_lo, v1, v255
+v_cmp_eq_f16 vcc, v1, v255
// GFX11: v_cmp_eq_f16_e64
-v_cmp_eq_f16 vcc_lo, v127, v255
+v_cmp_eq_f16 vcc, v127, v255
// GFX11: v_cmp_eq_f16_e64
-v_cmp_eq_f16 vcc_lo, vcc_hi, v255
+v_cmp_eq_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_eq_f16_e64
-v_cmp_eq_f16 vcc_lo, vcc_lo, v255
+v_cmp_eq_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_eq_f16_e64
v_cmp_eq_i16 vcc, v1, v255
@@ -57,16 +57,16 @@ v_cmp_eq_i16 vcc, vcc_hi, v255
v_cmp_eq_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_eq_i16_e64
-v_cmp_eq_i16 vcc_lo, v1, v255
+v_cmp_eq_i16 vcc, v1, v255
// GFX11: v_cmp_eq_i16_e64
-v_cmp_eq_i16 vcc_lo, v127, v255
+v_cmp_eq_i16 vcc, v127, v255
// GFX11: v_cmp_eq_i16_e64
-v_cmp_eq_i16 vcc_lo, vcc_hi, v255
+v_cmp_eq_i16 vcc, vcc_hi, v255
// GFX11: v_cmp_eq_i16_e64
-v_cmp_eq_i16 vcc_lo, vcc_lo, v255
+v_cmp_eq_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_eq_i16_e64
v_cmp_eq_u16 vcc, v1, v255
@@ -81,16 +81,16 @@ v_cmp_eq_u16 vcc, vcc_hi, v255
v_cmp_eq_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_eq_u16_e64
-v_cmp_eq_u16 vcc_lo, v1, v255
+v_cmp_eq_u16 vcc, v1, v255
// GFX11: v_cmp_eq_u16_e64
-v_cmp_eq_u16 vcc_lo, v127, v255
+v_cmp_eq_u16 vcc, v127, v255
// GFX11: v_cmp_eq_u16_e64
-v_cmp_eq_u16 vcc_lo, vcc_hi, v255
+v_cmp_eq_u16 vcc, vcc_hi, v255
// GFX11: v_cmp_eq_u16_e64
-v_cmp_eq_u16 vcc_lo, vcc_lo, v255
+v_cmp_eq_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_eq_u16_e64
v_cmp_f_f16 vcc, v1, v255
@@ -105,16 +105,16 @@ v_cmp_f_f16 vcc, vcc_hi, v255
v_cmp_f_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_f_f16_e64
-v_cmp_f_f16 vcc_lo, v1, v255
+v_cmp_f_f16 vcc, v1, v255
// GFX11: v_cmp_f_f16_e64
-v_cmp_f_f16 vcc_lo, v127, v255
+v_cmp_f_f16 vcc, v127, v255
// GFX11: v_cmp_f_f16_e64
-v_cmp_f_f16 vcc_lo, vcc_hi, v255
+v_cmp_f_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_f_f16_e64
-v_cmp_f_f16 vcc_lo, vcc_lo, v255
+v_cmp_f_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_f_f16_e64
v_cmp_ge_f16 vcc, v1, v255
@@ -129,16 +129,16 @@ v_cmp_ge_f16 vcc, vcc_hi, v255
v_cmp_ge_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_ge_f16_e64
-v_cmp_ge_f16 vcc_lo, v1, v255
+v_cmp_ge_f16 vcc, v1, v255
// GFX11: v_cmp_ge_f16_e64
-v_cmp_ge_f16 vcc_lo, v127, v255
+v_cmp_ge_f16 vcc, v127, v255
// GFX11: v_cmp_ge_f16_e64
-v_cmp_ge_f16 vcc_lo, vcc_hi, v255
+v_cmp_ge_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_ge_f16_e64
-v_cmp_ge_f16 vcc_lo, vcc_lo, v255
+v_cmp_ge_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_ge_f16_e64
v_cmp_ge_i16 vcc, v1, v255
@@ -153,16 +153,16 @@ v_cmp_ge_i16 vcc, vcc_hi, v255
v_cmp_ge_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_ge_i16_e64
-v_cmp_ge_i16 vcc_lo, v1, v255
+v_cmp_ge_i16 vcc, v1, v255
// GFX11: v_cmp_ge_i16_e64
-v_cmp_ge_i16 vcc_lo, v127, v255
+v_cmp_ge_i16 vcc, v127, v255
// GFX11: v_cmp_ge_i16_e64
-v_cmp_ge_i16 vcc_lo, vcc_hi, v255
+v_cmp_ge_i16 vcc, vcc_hi, v255
// GFX11: v_cmp_ge_i16_e64
-v_cmp_ge_i16 vcc_lo, vcc_lo, v255
+v_cmp_ge_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_ge_i16_e64
v_cmp_ge_u16 vcc, v1, v255
@@ -177,16 +177,16 @@ v_cmp_ge_u16 vcc, vcc_hi, v255
v_cmp_ge_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_ge_u16_e64
-v_cmp_ge_u16 vcc_lo, v1, v255
+v_cmp_ge_u16 vcc, v1, v255
// GFX11: v_cmp_ge_u16_e64
-v_cmp_ge_u16 vcc_lo, v127, v255
+v_cmp_ge_u16 vcc, v127, v255
// GFX11: v_cmp_ge_u16_e64
-v_cmp_ge_u16 vcc_lo, vcc_hi, v255
+v_cmp_ge_u16 vcc, vcc_hi, v255
// GFX11: v_cmp_ge_u16_e64
-v_cmp_ge_u16 vcc_lo, vcc_lo, v255
+v_cmp_ge_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_ge_u16_e64
v_cmp_gt_f16 vcc, v1, v255
@@ -201,16 +201,16 @@ v_cmp_gt_f16 vcc, vcc_hi, v255
v_cmp_gt_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_gt_f16_e64
-v_cmp_gt_f16 vcc_lo, v1, v255
+v_cmp_gt_f16 vcc, v1, v255
// GFX11: v_cmp_gt_f16_e64
-v_cmp_gt_f16 vcc_lo, v127, v255
+v_cmp_gt_f16 vcc, v127, v255
// GFX11: v_cmp_gt_f16_e64
-v_cmp_gt_f16 vcc_lo, vcc_hi, v255
+v_cmp_gt_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_gt_f16_e64
-v_cmp_gt_f16 vcc_lo, vcc_lo, v255
+v_cmp_gt_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_gt_f16_e64
v_cmp_gt_i16 vcc, v1, v255
@@ -225,16 +225,16 @@ v_cmp_gt_i16 vcc, vcc_hi, v255
v_cmp_gt_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_gt_i16_e64
-v_cmp_gt_i16 vcc_lo, v1, v255
+v_cmp_gt_i16 vcc, v1, v255
// GFX11: v_cmp_gt_i16_e64
-v_cmp_gt_i16 vcc_lo, v127, v255
+v_cmp_gt_i16 vcc, v127, v255
// GFX11: v_cmp_gt_i16_e64
-v_cmp_gt_i16 vcc_lo, vcc_hi, v255
+v_cmp_gt_i16 vcc, vcc_hi, v255
// GFX11: v_cmp_gt_i16_e64
-v_cmp_gt_i16 vcc_lo, vcc_lo, v255
+v_cmp_gt_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_gt_i16_e64
v_cmp_gt_u16 vcc, v1, v255
@@ -249,16 +249,16 @@ v_cmp_gt_u16 vcc, vcc_hi, v255
v_cmp_gt_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_gt_u16_e64
-v_cmp_gt_u16 vcc_lo, v1, v255
+v_cmp_gt_u16 vcc, v1, v255
// GFX11: v_cmp_gt_u16_e64
-v_cmp_gt_u16 vcc_lo, v127, v255
+v_cmp_gt_u16 vcc, v127, v255
// GFX11: v_cmp_gt_u16_e64
-v_cmp_gt_u16 vcc_lo, vcc_hi, v255
+v_cmp_gt_u16 vcc, vcc_hi, v255
// GFX11: v_cmp_gt_u16_e64
-v_cmp_gt_u16 vcc_lo, vcc_lo, v255
+v_cmp_gt_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_gt_u16_e64
v_cmp_le_f16 vcc, v1, v255
@@ -273,16 +273,16 @@ v_cmp_le_f16 vcc, vcc_hi, v255
v_cmp_le_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_le_f16_e64
-v_cmp_le_f16 vcc_lo, v1, v255
+v_cmp_le_f16 vcc, v1, v255
// GFX11: v_cmp_le_f16_e64
-v_cmp_le_f16 vcc_lo, v127, v255
+v_cmp_le_f16 vcc, v127, v255
// GFX11: v_cmp_le_f16_e64
-v_cmp_le_f16 vcc_lo, vcc_hi, v255
+v_cmp_le_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_le_f16_e64
-v_cmp_le_f16 vcc_lo, vcc_lo, v255
+v_cmp_le_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_le_f16_e64
v_cmp_le_i16 vcc, v1, v255
@@ -297,16 +297,16 @@ v_cmp_le_i16 vcc, vcc_hi, v255
v_cmp_le_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_le_i16_e64
-v_cmp_le_i16 vcc_lo, v1, v255
+v_cmp_le_i16 vcc, v1, v255
// GFX11: v_cmp_le_i16_e64
-v_cmp_le_i16 vcc_lo, v127, v255
+v_cmp_le_i16 vcc, v127, v255
// GFX11: v_cmp_le_i16_e64
-v_cmp_le_i16 vcc_lo, vcc_hi, v255
+v_cmp_le_i16 vcc, vcc_hi, v255
// GFX11: v_cmp_le_i16_e64
-v_cmp_le_i16 vcc_lo, vcc_lo, v255
+v_cmp_le_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_le_i16_e64
v_cmp_le_u16 vcc, v1, v255
@@ -321,16 +321,16 @@ v_cmp_le_u16 vcc, vcc_hi, v255
v_cmp_le_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_le_u16_e64
-v_cmp_le_u16 vcc_lo, v1, v255
+v_cmp_le_u16 vcc, v1, v255
// GFX11: v_cmp_le_u16_e64
-v_cmp_le_u16 vcc_lo, v127, v255
+v_cmp_le_u16 vcc, v127, v255
// GFX11: v_cmp_le_u16_e64
-v_cmp_le_u16 vcc_lo, vcc_hi, v255
+v_cmp_le_u16 vcc, vcc_hi, v255
// GFX11: v_cmp_le_u16_e64
-v_cmp_le_u16 vcc_lo, vcc_lo, v255
+v_cmp_le_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_le_u16_e64
v_cmp_lg_f16 vcc, v1, v255
@@ -345,16 +345,16 @@ v_cmp_lg_f16 vcc, vcc_hi, v255
v_cmp_lg_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_lg_f16_e64
-v_cmp_lg_f16 vcc_lo, v1, v255
+v_cmp_lg_f16 vcc, v1, v255
// GFX11: v_cmp_lg_f16_e64
-v_cmp_lg_f16 vcc_lo, v127, v255
+v_cmp_lg_f16 vcc, v127, v255
// GFX11: v_cmp_lg_f16_e64
-v_cmp_lg_f16 vcc_lo, vcc_hi, v255
+v_cmp_lg_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_lg_f16_e64
-v_cmp_lg_f16 vcc_lo, vcc_lo, v255
+v_cmp_lg_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_lg_f16_e64
v_cmp_lt_f16 vcc, v1, v255
@@ -369,16 +369,16 @@ v_cmp_lt_f16 vcc, vcc_hi, v255
v_cmp_lt_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_lt_f16_e64
-v_cmp_lt_f16 vcc_lo, v1, v255
+v_cmp_lt_f16 vcc, v1, v255
// GFX11: v_cmp_lt_f16_e64
-v_cmp_lt_f16 vcc_lo, v127, v255
+v_cmp_lt_f16 vcc, v127, v255
// GFX11: v_cmp_lt_f16_e64
-v_cmp_lt_f16 vcc_lo, vcc_hi, v255
+v_cmp_lt_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_lt_f16_e64
-v_cmp_lt_f16 vcc_lo, vcc_lo, v255
+v_cmp_lt_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_lt_f16_e64
v_cmp_lt_i16 vcc, v1, v255
@@ -393,16 +393,16 @@ v_cmp_lt_i16 vcc, vcc_hi, v255
v_cmp_lt_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_lt_i16_e64
-v_cmp_lt_i16 vcc_lo, v1, v255
+v_cmp_lt_i16 vcc, v1, v255
// GFX11: v_cmp_lt_i16_e64
-v_cmp_lt_i16 vcc_lo, v127, v255
+v_cmp_lt_i16 vcc, v127, v255
// GFX11: v_cmp_lt_i16_e64
-v_cmp_lt_i16 vcc_lo, vcc_hi, v255
+v_cmp_lt_i16 vcc, vcc_hi, v255
// GFX11: v_cmp_lt_i16_e64
-v_cmp_lt_i16 vcc_lo, vcc_lo, v255
+v_cmp_lt_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_lt_i16_e64
v_cmp_lt_u16 vcc, v1, v255
@@ -417,16 +417,16 @@ v_cmp_lt_u16 vcc, vcc_hi, v255
v_cmp_lt_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_lt_u16_e64
-v_cmp_lt_u16 vcc_lo, v1, v255
+v_cmp_lt_u16 vcc, v1, v255
// GFX11: v_cmp_lt_u16_e64
-v_cmp_lt_u16 vcc_lo, v127, v255
+v_cmp_lt_u16 vcc, v127, v255
// GFX11: v_cmp_lt_u16_e64
-v_cmp_lt_u16 vcc_lo, vcc_hi, v255
+v_cmp_lt_u16 vcc, vcc_hi, v255
// GFX11: v_cmp_lt_u16_e64
-v_cmp_lt_u16 vcc_lo, vcc_lo, v255
+v_cmp_lt_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_lt_u16_e64
v_cmp_ne_i16 vcc, v1, v255
@@ -441,16 +441,16 @@ v_cmp_ne_i16 vcc, vcc_hi, v255
v_cmp_ne_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_ne_i16_e64
-v_cmp_ne_i16 vcc_lo, v1, v255
+v_cmp_ne_i16 vcc, v1, v255
// GFX11: v_cmp_ne_i16_e64
-v_cmp_ne_i16 vcc_lo, v127, v255
+v_cmp_ne_i16 vcc, v127, v255
// GFX11: v_cmp_ne_i16_e64
-v_cmp_ne_i16 vcc_lo, vcc_hi, v255
+v_cmp_ne_i16 vcc, vcc_hi, v255
// GFX11: v_cmp_ne_i16_e64
-v_cmp_ne_i16 vcc_lo, vcc_lo, v255
+v_cmp_ne_i16 vcc, vcc_lo, v255
// GFX11: v_cmp_ne_i16_e64
v_cmp_ne_u16 vcc, v1, v255
@@ -465,16 +465,16 @@ v_cmp_ne_u16 vcc, vcc_hi, v255
v_cmp_ne_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_ne_u16_e64
-v_cmp_ne_u16 vcc_lo, v1, v255
+v_cmp_ne_u16 vcc, v1, v255
// GFX11: v_cmp_ne_u16_e64
-v_cmp_ne_u16 vcc_lo, v127, v255
+v_cmp_ne_u16 vcc, v127, v255
// GFX11: v_cmp_ne_u16_e64
-v_cmp_ne_u16 vcc_lo, vcc_hi, v255
+v_cmp_ne_u16 vcc, vcc_hi, v255
// GFX11: v_cmp_ne_u16_e64
-v_cmp_ne_u16 vcc_lo, vcc_lo, v255
+v_cmp_ne_u16 vcc, vcc_lo, v255
// GFX11: v_cmp_ne_u16_e64
v_cmp_neq_f16 vcc, v1, v255
@@ -489,16 +489,16 @@ v_cmp_neq_f16 vcc, vcc_hi, v255
v_cmp_neq_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_neq_f16_e64
-v_cmp_neq_f16 vcc_lo, v1, v255
+v_cmp_neq_f16 vcc, v1, v255
// GFX11: v_cmp_neq_f16_e64
-v_cmp_neq_f16 vcc_lo, v127, v255
+v_cmp_neq_f16 vcc, v127, v255
// GFX11: v_cmp_neq_f16_e64
-v_cmp_neq_f16 vcc_lo, vcc_hi, v255
+v_cmp_neq_f16 vcc, vcc_hi, v255
// GFX11: v_cmp_neq_f16_e64
-v_cmp_neq_f16 vcc_lo, vcc_lo, v255
+v_cmp_neq_f16 vcc, vcc_lo, v255
// GFX11: v_cmp_neq_f16_e64
v_cmp_nge_f16 vcc, v1, v255
@@ -513,16 +5...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/98400
More information about the llvm-commits
mailing list