[llvm] AMDGPU: Ensure both wavesize features are not set (PR #159234)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 25 02:16:49 PDT 2025


https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/159234

>From 619f48d2b6a4da75b7123c4dfc376ffd5b3b471d Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 17 Sep 2025 02:00:48 +0900
Subject: [PATCH 1/9] AMDGPU: Ensure both wavesize features are not set

Make sure we cannot be in a mode with both wavesizes. This
prevents assertions in a future change. This should probably
just be an error, but we do not have a good way to report
errors from the MCSubtargetInfo constructor.

This breaks the assembler test which enables both, but this
behavior is not really useful. Maybe it's better to just delete
the test.
---
 .../MCTargetDesc/AMDGPUMCTargetDesc.cpp       | 16 +++++++++++--
 .../wavesize-feature-unsupported-target.s     | 23 +++++++++++++++++++
 .../AMDGPU/gfx1250_wave64_feature.s           | 13 +++++++++++
 .../AMDGPU/gfx9_wave32_feature.txt            | 13 +++++++++++
 4 files changed, 63 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/MC/AMDGPU/wavesize-feature-unsupported-target.s
 create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx1250_wave64_feature.s
 create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx9_wave32_feature.txt

diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index f2e2d0ed3f8a6..0ea5ad7ccaea4 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -82,20 +82,32 @@ createAMDGPUMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
   MCSubtargetInfo *STI =
       createAMDGPUMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
 
+  bool IsWave64 = STI->hasFeature(AMDGPU::FeatureWavefrontSize64);
+  bool IsWave32 = STI->hasFeature(AMDGPU::FeatureWavefrontSize32);
+
   // FIXME: We should error for the default target.
   if (STI->getFeatureBits().none())
     STI->ToggleFeature(AMDGPU::FeatureSouthernIslands);
 
-  if (!STI->hasFeature(AMDGPU::FeatureWavefrontSize64) &&
-      !STI->hasFeature(AMDGPU::FeatureWavefrontSize32)) {
+  if (!IsWave64 && !IsWave32) {
     // If there is no default wave size it must be a generation before gfx10,
     // these have FeatureWavefrontSize64 in their definition already. For gfx10+
     // set wave32 as a default.
     STI->ToggleFeature(AMDGPU::isGFX10Plus(*STI)
                            ? AMDGPU::FeatureWavefrontSize32
                            : AMDGPU::FeatureWavefrontSize64);
+  } else if (IsWave64 && IsWave32) {
+    // The wave size is mutually exclusive. If both somehow end up set, wave64
+    // wins.
+    //
+    // FIXME: This should really just be an error.
+    STI->ToggleFeature(AMDGPU::FeatureWavefrontSize32);
   }
 
+  assert((STI->hasFeature(AMDGPU::FeatureWavefrontSize64) ^
+          STI->hasFeature(AMDGPU::FeatureWavefrontSize32)) &&
+         "wavesize features are mutually exclusive");
+
   return STI;
 }
 
diff --git a/llvm/test/MC/AMDGPU/wavesize-feature-unsupported-target.s b/llvm/test/MC/AMDGPU/wavesize-feature-unsupported-target.s
new file mode 100644
index 0000000000000..8fc7b7fb05f0c
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/wavesize-feature-unsupported-target.s
@@ -0,0 +1,23 @@
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+wavefrontsize64 -o - %s | FileCheck -check-prefix=GFX1250 %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx900 -mattr=+wavefrontsize32 -o - %s | FileCheck -check-prefix=GFX900 %s
+
+// Both are supported, but not at the same time
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,+wavefrontsize64 %s | FileCheck -check-prefixes=GFX10 %s
+
+// Test that there is no assertion when using an explicit
+// wavefrontsize attribute on a target which does not support it.
+
+// GFX1250: v_add_f64_e32 v[0:1], 1.0, v[0:1]
+// GFX900: v_add_f64 v[0:1], 1.0, v[0:1]
+// GFX10: v_add_f64 v[0:1], 1.0, v[0:1]
+v_add_f64 v[0:1], 1.0, v[0:1]
+
+// GFX1250: v_cmp_eq_u32_e64 s[0:1], 1.0, s1
+// GFX900: v_cmp_eq_u32_e64 s[0:1], 1.0, s1
+// GFX10: v_cmp_eq_u32_e64 s[0:1], 1.0, s1
+v_cmp_eq_u32_e64 s[0:1], 1.0, s1
+
+// GFX1250: v_cndmask_b32_e64 v1, v2, v3, s[0:1]
+// GFX900: v_cndmask_b32_e64 v1, v2, v3, s[0:1]
+// GFX10: v_cndmask_b32_e64 v1, v2, v3, s[0:1]
+v_cndmask_b32 v1, v2, v3, s[0:1]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_wave64_feature.s b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_wave64_feature.s
new file mode 100644
index 0000000000000..bdea636a9efe3
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_wave64_feature.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+wavefrontsize64 -disassemble -o - %s | FileCheck %s
+
+# Make sure there's no assertion when trying to use an unsupported
+# wave64 on a wave32-only target
+
+# CHECK: v_add_f64_e32 v[0:1], 1.0, v[0:1]
+0xf2,0x00,0x00,0x04
+
+# CHECK: v_cmp_eq_u32_e64 s[0:1], 1.0, s1
+0x00,0x00,0x4a,0xd4,0xf2,0x02,0x00,0x00
+
+# CHECK: v_cndmask_b32_e64 v1, v2, v3, s[0:1]
+0x01,0x00,0x01,0xd5,0x02,0x07,0x02,0x00
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_wave32_feature.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_wave32_feature.txt
new file mode 100644
index 0000000000000..40494b3dfa1ea
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_wave32_feature.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx900 -mattr=+wavefrontsize32 -disassemble -o - %s | FileCheck %s
+
+# Make sure there's no assertion when trying to use an unsupported
+# wave32 on a wave64-only target
+
+# CHECK: v_add_f64 v[0:1], 1.0, v[0:1]
+0x00,0x00,0x80,0xd2,0xf2,0x00,0x02,0x00
+
+# CHECK: v_cmp_eq_u32_e64 s[0:1], 1.0, s1
+0x00,0x00,0xca,0xd0,0xf2,0x02,0x00,0x00
+
+# CHECK: v_cndmask_b32_e64 v1, v2, v3, s[0:1]
+0x01,0x00,0x00,0xd1,0x02,0x07,0x02,0x00

>From 3b4d5a3bbd99073ecf6d31653e637567696c003f Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 17 Sep 2025 11:44:35 +0900
Subject: [PATCH 2/9] Convert wave_any test to update_mc_test_checks

---
 llvm/test/MC/AMDGPU/wave_any.s | 57 +++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 28 deletions(-)

diff --git a/llvm/test/MC/AMDGPU/wave_any.s b/llvm/test/MC/AMDGPU/wave_any.s
index 27502eff89bfc..3c265db30a324 100644
--- a/llvm/test/MC/AMDGPU/wave_any.s
+++ b/llvm/test/MC/AMDGPU/wave_any.s
@@ -1,13 +1,14 @@
+// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 6
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX10 %s
 
 v_cmp_ge_i32_e32 s0, v0
-// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x0c,0x7d]
+// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v0         ; encoding: [0x00,0x00,0x0c,0x7d]
 
 v_cmp_ge_i32_e32 vcc_lo, s0, v1
-// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v1 ; encoding: [0x00,0x02,0x0c,0x7d]
+// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v1         ; encoding: [0x00,0x02,0x0c,0x7d]
 
 v_cmp_ge_i32_e32 vcc, s0, v2
-// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v2 ; encoding: [0x00,0x04,0x0c,0x7d]
+// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v2         ; encoding: [0x00,0x04,0x0c,0x7d]
 
 v_cmp_le_f16_sdwa s0, v3, v4 src0_sel:WORD_1 src1_sel:DWORD
 // GFX10: v_cmp_le_f16_sdwa s0, v3, v4 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x08,0x96,0x7d,0x03,0x80,0x05,0x06]
@@ -16,10 +17,10 @@ v_cmp_le_f16_sdwa s[0:1], v3, v4 src0_sel:WORD_1 src1_sel:DWORD
 // GFX10: v_cmp_le_f16_sdwa s[0:1], v3, v4 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x08,0x96,0x7d,0x03,0x80,0x05,0x06]
 
 v_cmp_class_f32_e32 vcc_lo, s0, v0
-// GFX10: v_cmp_class_f32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d]
+// GFX10: v_cmp_class_f32_e32 vcc_lo, s0, v0      ; encoding: [0x00,0x00,0x10,0x7d]
 
 v_cmp_class_f32_e32 vcc, s0, v0
-// GFX10: v_cmp_class_f32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d]
+// GFX10: v_cmp_class_f32_e32 vcc_lo, s0, v0      ; encoding: [0x00,0x00,0x10,0x7d]
 
 v_cmp_class_f16_sdwa vcc_lo, v1, v2 src0_sel:DWORD src1_sel:DWORD
 // GFX10: v_cmp_class_f16_sdwa vcc_lo, v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x00,0x06,0x06]
@@ -34,13 +35,13 @@ v_cmp_class_f16_sdwa s[0:1], v1, v2 src0_sel:DWORD src1_sel:DWORD
 // GFX10: v_cmp_class_f16_sdwa s[0:1], v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x80,0x06,0x06]
 
 v_cndmask_b32_e32 v1, v2, v3,
-// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02]
+// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo    ; encoding: [0x02,0x07,0x02,0x02]
 
 v_cndmask_b32_e32 v1, v2, v3, vcc_lo
-// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02]
+// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo    ; encoding: [0x02,0x07,0x02,0x02]
 
 v_cndmask_b32_e32 v1, v2, v3, vcc
-// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02]
+// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo    ; encoding: [0x02,0x07,0x02,0x02]
 
 v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo
 // GFX10: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x50]
@@ -127,61 +128,61 @@ v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 ban
 // GFX10: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x54,0x01,0xe4,0x00,0x00]
 
 v_add_co_u32 v0, s0, v0, v2
-// GFX10: v_add_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
+// GFX10: v_add_co_u32 v0, s0, v0, v2             ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
 
 v_add_co_u32_e64 v0, s0, v0, v2
-// GFX10: v_add_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
+// GFX10: v_add_co_u32 v0, s0, v0, v2             ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
 
 v_add_co_ci_u32_e64 v4, s0, v1, v5, s2
-// GFX10: v_add_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0x0a,0x00]
+// GFX10: v_add_co_ci_u32_e64 v4, s0, v1, v5, s2  ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0x0a,0x00]
 
 v_sub_co_u32 v0, s0, v0, v2
-// GFX10: v_sub_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
+// GFX10: v_sub_co_u32 v0, s0, v0, v2             ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
 
 v_sub_co_u32_e64 v0, s0, v0, v2
-// GFX10: v_sub_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
+// GFX10: v_sub_co_u32 v0, s0, v0, v2             ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
 
 v_sub_co_ci_u32_e64 v4, s0, v1, v5, s2
-// GFX10: v_sub_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x29,0xd5,0x01,0x0b,0x0a,0x00]
+// GFX10: v_sub_co_ci_u32_e64 v4, s0, v1, v5, s2  ; encoding: [0x04,0x00,0x29,0xd5,0x01,0x0b,0x0a,0x00]
 
 v_subrev_co_u32 v0, s0, v0, v2
-// GFX10: v_subrev_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
+// GFX10: v_subrev_co_u32 v0, s0, v0, v2          ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
 
 v_subrev_co_u32_e64 v0, s0, v0, v2
-// GFX10: v_subrev_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
+// GFX10: v_subrev_co_u32 v0, s0, v0, v2          ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
 
 v_subrev_co_ci_u32_e64 v4, s0, v1, v5, s2
 // GFX10: v_subrev_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x2a,0xd5,0x01,0x0b,0x0a,0x00]
 
 v_add_co_u32 v0, s[0:1], v0, v2
-// GFX10: v_add_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
+// GFX10: v_add_co_u32 v0, s[0:1], v0, v2         ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
 
 v_add_co_u32 v0, exec, v0, v2
-// GFX10: v_add_co_u32 v0, exec, v0, v2 ; encoding: [0x00,0x7e,0x0f,0xd7,0x00,0x05,0x02,0x00]
+// GFX10: v_add_co_u32 v0, exec, v0, v2           ; encoding: [0x00,0x7e,0x0f,0xd7,0x00,0x05,0x02,0x00]
 
 v_add_co_u32 v0, exec_lo, v0, v2
-// GFX10: v_add_co_u32 v0, exec_lo, v0, v2 ; encoding: [0x00,0x7e,0x0f,0xd7,0x00,0x05,0x02,0x00]
+// GFX10: v_add_co_u32 v0, exec_lo, v0, v2        ; encoding: [0x00,0x7e,0x0f,0xd7,0x00,0x05,0x02,0x00]
 
 v_add_co_u32_e64 v0, s[0:1], v0, v2
-// GFX10: v_add_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
+// GFX10: v_add_co_u32 v0, s[0:1], v0, v2         ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
 
 v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
 // GFX10: v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0x0a,0x00]
 
 v_sub_co_u32 v0, s[0:1], v0, v2
-// GFX10: v_sub_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
+// GFX10: v_sub_co_u32 v0, s[0:1], v0, v2         ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
 
 v_sub_co_u32_e64 v0, s[0:1], v0, v2
-// GFX10: v_sub_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
+// GFX10: v_sub_co_u32 v0, s[0:1], v0, v2         ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
 
 v_sub_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
 // GFX10: v_sub_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] ; encoding: [0x04,0x00,0x29,0xd5,0x01,0x0b,0x0a,0x00]
 
 v_subrev_co_u32 v0, s[0:1], v0, v2
-// GFX10: v_subrev_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
+// GFX10: v_subrev_co_u32 v0, s[0:1], v0, v2      ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
 
 v_subrev_co_u32_e64 v0, s[0:1], v0, v2
-// GFX10: v_subrev_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
+// GFX10: v_subrev_co_u32 v0, s[0:1], v0, v2      ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
 
 v_subrev_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
 // GFX10: v_subrev_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] ; encoding: [0x04,0x00,0x2a,0xd5,0x01,0x0b,0x0a,0x00]
@@ -199,10 +200,10 @@ v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, vcc
 // GFX10: v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, vcc ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0xaa,0x01]
 
 v_div_scale_f32 v2, s2, v0, v0, v2
-// GFX10: v_div_scale_f32 v2, s2, v0, v0, v2 ; encoding: [0x02,0x02,0x6d,0xd5,0x00,0x01,0x0a,0x04]
+// GFX10: v_div_scale_f32 v2, s2, v0, v0, v2      ; encoding: [0x02,0x02,0x6d,0xd5,0x00,0x01,0x0a,0x04]
 
 v_div_scale_f32 v2, s[2:3], v0, v0, v2
-// GFX10: v_div_scale_f32 v2, s[2:3], v0, v0, v2 ; encoding: [0x02,0x02,0x6d,0xd5,0x00,0x01,0x0a,0x04]
+// GFX10: v_div_scale_f32 v2, s[2:3], v0, v0, v2  ; encoding: [0x02,0x02,0x6d,0xd5,0x00,0x01,0x0a,0x04]
 
 v_div_scale_f64 v[2:3], s2, v[0:1], v[0:1], v[2:3]
 // GFX10: v_div_scale_f64 v[2:3], s2, v[0:1], v[0:1], v[2:3] ; encoding: [0x02,0x02,0x6e,0xd5,0x00,0x01,0x0a,0x04]
@@ -223,7 +224,7 @@ v_mad_u64_u32 v[0:1], s[6:7], v0, v1, v[2:3]
 // GFX10: v_mad_u64_u32 v[0:1], s[6:7], v0, v1, v[2:3] ; encoding: [0x00,0x06,0x76,0xd5,0x00,0x03,0x0a,0x04]
 
 v_cmpx_neq_f32_e32 v0, v1
-// GFX10: v_cmpx_neq_f32_e32 v0, v1 ; encoding: [0x00,0x03,0x3a,0x7c]
+// GFX10: v_cmpx_neq_f32_e32 v0, v1               ; encoding: [0x00,0x03,0x3a,0x7c]
 
 v_cmpx_neq_f32_sdwa v0, v1 src0_sel:WORD_1 src1_sel:DWORD
 // GFX10: v_cmpx_neq_f32_sdwa v0, v1 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x02,0x3a,0x7c,0x00,0x00,0x05,0x06]
@@ -232,7 +233,7 @@ v_cmpx_eq_u32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD
 // GFX10: v_cmpx_eq_u32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x02,0xa5,0x7d,0x00,0x00,0x05,0x86]
 
 v_cmpx_class_f32_e64 v0, 1
-// GFX10: v_cmpx_class_f32_e64 v0, 1 ; encoding: [0x7e,0x00,0x98,0xd4,0x00,0x03,0x01,0x00]
+// GFX10: v_cmpx_class_f32_e64 v0, 1              ; encoding: [0x7e,0x00,0x98,0xd4,0x00,0x03,0x01,0x00]
 
 v_cmpx_class_f32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD
 // GFX10: v_cmpx_class_f32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x02,0x31,0x7d,0x00,0x00,0x05,0x86]

>From 86f3a94c8a520f58d2a9754c4fc53d744efcdfa8 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 17 Sep 2025 11:46:11 +0900
Subject: [PATCH 3/9] update wave_any test

---
 llvm/test/MC/AMDGPU/wave_any.s | 101 +++++++++++++++++----------------
 1 file changed, 51 insertions(+), 50 deletions(-)

diff --git a/llvm/test/MC/AMDGPU/wave_any.s b/llvm/test/MC/AMDGPU/wave_any.s
index 3c265db30a324..15b235a92d68e 100644
--- a/llvm/test/MC/AMDGPU/wave_any.s
+++ b/llvm/test/MC/AMDGPU/wave_any.s
@@ -1,158 +1,159 @@
 // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 6
-// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX10 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10 %s
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,+wavefrontsize64 -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error: --check-prefixes=GFX10-ERR %s
 
 v_cmp_ge_i32_e32 s0, v0
-// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v0         ; encoding: [0x00,0x00,0x0c,0x7d]
+// GFX10: v_cmp_ge_i32_e32 vcc, s0, v0            ; encoding: [0x00,0x00,0x0c,0x7d]
 
 v_cmp_ge_i32_e32 vcc_lo, s0, v1
-// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v1         ; encoding: [0x00,0x02,0x0c,0x7d]
+// GFX10-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_ge_i32_e32 vcc, s0, v2
-// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v2         ; encoding: [0x00,0x04,0x0c,0x7d]
+// GFX10: v_cmp_ge_i32_e32 vcc, s0, v2            ; encoding: [0x00,0x04,0x0c,0x7d]
 
 v_cmp_le_f16_sdwa s0, v3, v4 src0_sel:WORD_1 src1_sel:DWORD
-// GFX10: v_cmp_le_f16_sdwa s0, v3, v4 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x08,0x96,0x7d,0x03,0x80,0x05,0x06]
+// GFX10-ERR: :[[@LINE-1]]:19: error: invalid operand for instruction
 
 v_cmp_le_f16_sdwa s[0:1], v3, v4 src0_sel:WORD_1 src1_sel:DWORD
 // GFX10: v_cmp_le_f16_sdwa s[0:1], v3, v4 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x08,0x96,0x7d,0x03,0x80,0x05,0x06]
 
 v_cmp_class_f32_e32 vcc_lo, s0, v0
-// GFX10: v_cmp_class_f32_e32 vcc_lo, s0, v0      ; encoding: [0x00,0x00,0x10,0x7d]
+// GFX10-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cmp_class_f32_e32 vcc, s0, v0
-// GFX10: v_cmp_class_f32_e32 vcc_lo, s0, v0      ; encoding: [0x00,0x00,0x10,0x7d]
+// GFX10: v_cmp_class_f32_e32 vcc, s0, v0         ; encoding: [0x00,0x00,0x10,0x7d]
 
 v_cmp_class_f16_sdwa vcc_lo, v1, v2 src0_sel:DWORD src1_sel:DWORD
-// GFX10: v_cmp_class_f16_sdwa vcc_lo, v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x00,0x06,0x06]
+// GFX10-ERR: :[[@LINE-1]]:22: error: invalid operand for instruction
 
 v_cmp_class_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD
 // GFX10: v_cmp_class_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x00,0x06,0x06]
 
 v_cmp_class_f16_sdwa s0, v1, v2 src0_sel:DWORD src1_sel:DWORD
-// GFX10: v_cmp_class_f16_sdwa s0, v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x80,0x06,0x06]
+// GFX10-ERR: :[[@LINE-1]]:22: error: invalid operand for instruction
 
 v_cmp_class_f16_sdwa s[0:1], v1, v2 src0_sel:DWORD src1_sel:DWORD
 // GFX10: v_cmp_class_f16_sdwa s[0:1], v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x80,0x06,0x06]
 
 v_cndmask_b32_e32 v1, v2, v3,
-// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo    ; encoding: [0x02,0x07,0x02,0x02]
+// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc       ; encoding: [0x02,0x07,0x02,0x02]
 
 v_cndmask_b32_e32 v1, v2, v3, vcc_lo
-// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo    ; encoding: [0x02,0x07,0x02,0x02]
+// GFX10-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_cndmask_b32_e32 v1, v2, v3, vcc
-// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo    ; encoding: [0x02,0x07,0x02,0x02]
+// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc       ; encoding: [0x02,0x07,0x02,0x02]
 
 v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo
-// GFX10: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x50]
+// GFX10-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc
-// GFX10: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x50]
+// GFX10: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x50]
 
 v_add_co_ci_u32_e32 v3, v3, v4
-// GFX10: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x50]
+// GFX10: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x50]
 
 v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo
-// GFX10: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x52]
+// GFX10-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc
-// GFX10: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x52]
+// GFX10: v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x52]
 
 v_sub_co_ci_u32_e32 v3, v3, v4
-// GFX10: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x52]
+// GFX10: v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x52]
 
 v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
-// GFX10: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; encoding: [0x80,0x02,0x02,0x54]
+// GFX10-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc
-// GFX10: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; encoding: [0x80,0x02,0x02,0x54]
+// GFX10: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; encoding: [0x80,0x02,0x02,0x54]
 
 v_subrev_co_ci_u32_e32 v1, 0, v1
-// GFX10: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; encoding: [0x80,0x02,0x02,0x54]
+// GFX10: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; encoding: [0x80,0x02,0x02,0x54]
 
 v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-// GFX10: v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06]
+// GFX10-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
 // GFX10: v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06]
 
 v_add_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-// GFX10: v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06]
+// GFX10: v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06]
 
 v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-// GFX10: v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06]
+// GFX10-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
 // GFX10: v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06]
 
 v_sub_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-// GFX10: v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06]
+// GFX10: v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06]
 
 v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-// GFX10: v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06]
+// GFX10-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
 // GFX10: v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06]
 
 v_subrev_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-// GFX10: v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06]
+// GFX10: v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06]
 
 v_add_co_ci_u32 v1, sext(v1), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-// GFX10: v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e]
+// GFX10: v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e]
 
 v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-// GFX10: v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e]
+// GFX10-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
 // GFX10: v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e]
 
 v_add_co_ci_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// GFX10: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00]
+// GFX10: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00]
 
 v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// GFX10: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00]
+// GFX10-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
 // GFX10: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00]
 
 v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// GFX10: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x52,0x01,0xe4,0x00,0x00]
+// GFX10-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
 // GFX10: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x52,0x01,0xe4,0x00,0x00]
 
 v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// GFX10: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x54,0x01,0xe4,0x00,0x00]
+// GFX10-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
 
 v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
 // GFX10: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x54,0x01,0xe4,0x00,0x00]
 
 v_add_co_u32 v0, s0, v0, v2
-// GFX10: v_add_co_u32 v0, s0, v0, v2             ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
+// GFX10-ERR: :[[@LINE-1]]:18: error: invalid operand for instruction
 
 v_add_co_u32_e64 v0, s0, v0, v2
-// GFX10: v_add_co_u32 v0, s0, v0, v2             ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
+// GFX10-ERR: :[[@LINE-1]]:22: error: invalid operand for instruction
 
 v_add_co_ci_u32_e64 v4, s0, v1, v5, s2
-// GFX10: v_add_co_ci_u32_e64 v4, s0, v1, v5, s2  ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0x0a,0x00]
+// GFX10-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction
 
 v_sub_co_u32 v0, s0, v0, v2
-// GFX10: v_sub_co_u32 v0, s0, v0, v2             ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
+// GFX10-ERR: :[[@LINE-1]]:18: error: invalid operand for instruction
 
 v_sub_co_u32_e64 v0, s0, v0, v2
-// GFX10: v_sub_co_u32 v0, s0, v0, v2             ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
+// GFX10-ERR: :[[@LINE-1]]:22: error: invalid operand for instruction
 
 v_sub_co_ci_u32_e64 v4, s0, v1, v5, s2
-// GFX10: v_sub_co_ci_u32_e64 v4, s0, v1, v5, s2  ; encoding: [0x04,0x00,0x29,0xd5,0x01,0x0b,0x0a,0x00]
+// GFX10-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction
 
 v_subrev_co_u32 v0, s0, v0, v2
-// GFX10: v_subrev_co_u32 v0, s0, v0, v2          ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
+// GFX10-ERR: :[[@LINE-1]]:21: error: invalid operand for instruction
 
 v_subrev_co_u32_e64 v0, s0, v0, v2
-// GFX10: v_subrev_co_u32 v0, s0, v0, v2          ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
+// GFX10-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction
 
 v_subrev_co_ci_u32_e64 v4, s0, v1, v5, s2
-// GFX10: v_subrev_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x2a,0xd5,0x01,0x0b,0x0a,0x00]
+// GFX10-ERR: :[[@LINE-1]]:28: error: invalid operand for instruction
 
 v_add_co_u32 v0, s[0:1], v0, v2
 // GFX10: v_add_co_u32 v0, s[0:1], v0, v2         ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
@@ -161,7 +162,7 @@ v_add_co_u32 v0, exec, v0, v2
 // GFX10: v_add_co_u32 v0, exec, v0, v2           ; encoding: [0x00,0x7e,0x0f,0xd7,0x00,0x05,0x02,0x00]
 
 v_add_co_u32 v0, exec_lo, v0, v2
-// GFX10: v_add_co_u32 v0, exec_lo, v0, v2        ; encoding: [0x00,0x7e,0x0f,0xd7,0x00,0x05,0x02,0x00]
+// GFX10-ERR: :[[@LINE-1]]:18: error: invalid operand for instruction
 
 v_add_co_u32_e64 v0, s[0:1], v0, v2
 // GFX10: v_add_co_u32 v0, s[0:1], v0, v2         ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
@@ -188,37 +189,37 @@ v_subrev_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
 // GFX10: v_subrev_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] ; encoding: [0x04,0x00,0x2a,0xd5,0x01,0x0b,0x0a,0x00]
 
 v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s2
-// GFX10: v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s2 ; encoding: [0x04,0x6a,0x28,0xd5,0x01,0x0b,0x0a,0x00]
+// GFX10-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction
 
 v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s[2:3]
-// GFX10: v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s[2:3] ; encoding: [0x04,0x6a,0x28,0xd5,0x01,0x0b,0x0a,0x00]
+// GFX10-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction
 
 v_add_co_ci_u32_e64 v4, s0, v1, v5, vcc_lo
-// GFX10: v_add_co_ci_u32_e64 v4, s0, v1, v5, vcc_lo ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0xaa,0x01]
+// GFX10-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction
 
 v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, vcc
 // GFX10: v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, vcc ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0xaa,0x01]
 
 v_div_scale_f32 v2, s2, v0, v0, v2
-// GFX10: v_div_scale_f32 v2, s2, v0, v0, v2      ; encoding: [0x02,0x02,0x6d,0xd5,0x00,0x01,0x0a,0x04]
+// GFX10-ERR: :[[@LINE-1]]:21: error: invalid operand for instruction
 
 v_div_scale_f32 v2, s[2:3], v0, v0, v2
 // GFX10: v_div_scale_f32 v2, s[2:3], v0, v0, v2  ; encoding: [0x02,0x02,0x6d,0xd5,0x00,0x01,0x0a,0x04]
 
 v_div_scale_f64 v[2:3], s2, v[0:1], v[0:1], v[2:3]
-// GFX10: v_div_scale_f64 v[2:3], s2, v[0:1], v[0:1], v[2:3] ; encoding: [0x02,0x02,0x6e,0xd5,0x00,0x01,0x0a,0x04]
+// GFX10-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction
 
 v_div_scale_f64 v[2:3], s[2:3], v[0:1], v[0:1], v[2:3]
 // GFX10: v_div_scale_f64 v[2:3], s[2:3], v[0:1], v[0:1], v[2:3] ; encoding: [0x02,0x02,0x6e,0xd5,0x00,0x01,0x0a,0x04]
 
 v_mad_i64_i32 v[0:1], s6, v0, v1, v[2:3]
-// GFX10: v_mad_i64_i32 v[0:1], s6, v0, v1, v[2:3] ; encoding: [0x00,0x06,0x77,0xd5,0x00,0x03,0x0a,0x04]
+// GFX10-ERR: :[[@LINE-1]]:23: error: invalid operand for instruction
 
 v_mad_i64_i32 v[0:1], s[6:7], v0, v1, v[2:3]
 // GFX10: v_mad_i64_i32 v[0:1], s[6:7], v0, v1, v[2:3] ; encoding: [0x00,0x06,0x77,0xd5,0x00,0x03,0x0a,0x04]
 
 v_mad_u64_u32 v[0:1], s6, v0, v1, v[2:3]
-// GFX10: v_mad_u64_u32 v[0:1], s6, v0, v1, v[2:3] ; encoding: [0x00,0x06,0x76,0xd5,0x00,0x03,0x0a,0x04]
+// GFX10-ERR: :[[@LINE-1]]:23: error: invalid operand for instruction
 
 v_mad_u64_u32 v[0:1], s[6:7], v0, v1, v[2:3]
 // GFX10: v_mad_u64_u32 v[0:1], s[6:7], v0, v1, v[2:3] ; encoding: [0x00,0x06,0x76,0xd5,0x00,0x03,0x0a,0x04]

>From 3fb03407d2a5c685b81215b63d2cb1fc10b8829c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 17 Sep 2025 17:49:14 +0900
Subject: [PATCH 4/9] replace xor

---
 llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 0ea5ad7ccaea4..03d12e3899722 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -104,7 +104,7 @@ createAMDGPUMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
     STI->ToggleFeature(AMDGPU::FeatureWavefrontSize32);
   }
 
-  assert((STI->hasFeature(AMDGPU::FeatureWavefrontSize64) ^
+  assert((STI->hasFeature(AMDGPU::FeatureWavefrontSize64) !=
           STI->hasFeature(AMDGPU::FeatureWavefrontSize32)) &&
          "wavesize features are mutually exclusive");
 

>From 5363f0a34e31b2354cbb409ab00ea74f26518616 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 17 Sep 2025 17:49:25 +0900
Subject: [PATCH 5/9] Test disassemble with both modes active

---
 llvm/test/MC/Disassembler/AMDGPU/gfx10_vopc.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vopc.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vopc.txt
index 2156a682337e8..336f4b2e88f47 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vopc.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vopc.txt
@@ -1,6 +1,6 @@
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefix=W32 %s
 # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefix=W64 %s
-
+# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,+wavefrontsize64 -disassemble -show-encoding < %s | FileCheck -strict-whitespace -check-prefix=W32 %s
 
 # W32: v_cmp_class_f32_e32 vcc_lo, -1, v2      ; encoding: [0xc1,0x04,0x10,0x7d]
 # W64: v_cmp_class_f32_e32 vcc, -1, v2         ; encoding: [0xc1,0x04,0x10,0x7d]

>From 48ea8fe972cad5d10d28169e50e72beabc24428b Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 17 Sep 2025 17:49:47 +0900
Subject: [PATCH 6/9] Preserve multiple set assembler behavior with
 assembler-only handling

---
 llvm/lib/Target/AMDGPU/AMDGPU.td              |  13 ++
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp      |  33 +++--
 llvm/lib/Target/AMDGPU/GCNSubtarget.h         |   1 +
 .../MCTargetDesc/AMDGPUMCTargetDesc.cpp       |  12 +-
 llvm/lib/Target/AMDGPU/SIInstrInfo.td         |   6 +-
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h |   5 +
 llvm/test/MC/AMDGPU/wave_any.s                | 122 +++++++++---------
 7 files changed, 112 insertions(+), 80 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index b2d1011eb506c..5d33164c1aba7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1238,6 +1238,19 @@ def FeatureSetPrioIncWgInst : SubtargetFeature<"setprio-inc-wg-inst",
 // Subtarget Features (options and debugging)
 //===------------------------------------------------------------===//
 
+// Ugly hack to accomodate an assembling modules with mixed
+// wavesizes. Ideally we would have a mapping symbol in assembly which
+// would keep track of which sections of code should be treated as
+// wave32 and wave64. Instead what users do is assemble with both
+// wavesizes enabled. We translate this into this special mode so this
+// only influences assembler behavior and nothing else.
+def FeatureAssemblerPermissiveWavesize : SubtargetFeature<
+  "assembler-permissive-wavesize",
+  "AssemblerPermissiveWavesize",
+  "true",
+  "allow parsing wave32 and wave64 variants of instructions"
+>;
+
 class FeatureMaxPrivateElementSize<int size> : SubtargetFeature<
   "max-private-element-size-"#size,
   "MaxPrivateElementSize",
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 21dfdfd6bed04..dfbde85231a6e 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1246,6 +1246,12 @@ raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
 // AsmParser
 //===----------------------------------------------------------------------===//
 
+// TODO: define GET_SUBTARGET_FEATURE_NAME
+#define GET_REGISTER_MATCHER
+#include "AMDGPUGenAsmMatcher.inc"
+#undef GET_REGISTER_MATCHER
+#undef GET_SUBTARGET_FEATURE_NAME
+
 // Holds info related to the current kernel, e.g. count of SGPRs used.
 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
 // .amdgpu_hsa_kernel or at EOF.
@@ -1536,6 +1542,10 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
     return AMDGPU::isGFX10_BEncoding(getSTI());
   }
 
+  bool isWave32() const { return getAvailableFeatures()[Feature_isWave32Bit]; }
+
+  bool isWave64() const { return getAvailableFeatures()[Feature_isWave64Bit]; }
+
   bool hasInv2PiInlineImm() const {
     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
   }
@@ -1603,6 +1613,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
     return &MII;
   }
 
+  // FIXME: This should not be used. Instead, should use queries derived from
+  // getAvailableFeatures().
   const FeatureBitset &getFeatureBits() const {
     return getSTI().getFeatureBits();
   }
@@ -2259,9 +2271,8 @@ bool AMDGPUOperand::isSDWAInt32Operand() const {
 }
 
 bool AMDGPUOperand::isBoolReg() const {
-  auto FB = AsmParser->getFeatureBits();
-  return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrc_b64()) ||
-                     (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrc_b32()));
+  return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
+                     (AsmParser->isWave32() && isSCSrc_b32()));
 }
 
 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
@@ -5025,9 +5036,8 @@ bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
 
 // Check if VCC register matches wavefront size
 bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
-  auto FB = getFeatureBits();
-  return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
-    (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
+  return (Reg == AMDGPU::VCC && isWave64()) ||
+         (Reg == AMDGPU::VCC_LO && isWave32());
 }
 
 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
@@ -5717,7 +5727,7 @@ bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
   // Check if this instruction may be used with a different wavesize.
   if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
       !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
-
+    // FIXME: Use getAvailableFeatures, and do not manually recompute
     FeatureBitset FeaturesWS32 = getFeatureBits();
     FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
         .flip(AMDGPU::FeatureWavefrontSize32);
@@ -6472,10 +6482,10 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
     if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
       if (!isGFX10Plus())
         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
-      if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
+      if (!isWave32())
         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
     } else {
-      if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
+      if (!isWave64())
         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
     }
   }
@@ -6484,10 +6494,10 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
     if (C.wavefront_size == 5) {
       if (!isGFX10Plus())
         return TokError("wavefront_size=5 is only allowed on GFX10+");
-      if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
+      if (!isWave32())
         return TokError("wavefront_size=5 requires +WavefrontSize32");
     } else if (C.wavefront_size == 6) {
-      if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
+      if (!isWave64())
         return TokError("wavefront_size=6 requires +WavefrontSize64");
     }
   }
@@ -10390,7 +10400,6 @@ LLVMInitializeAMDGPUAsmParser() {
   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
 }
 
-#define GET_REGISTER_MATCHER
 #define GET_MATCHER_IMPLEMENTATION
 #define GET_MNEMONIC_SPELL_CHECKER
 #define GET_MNEMONIC_CHECKER
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index f5367f3b88920..a54d6651c25c1 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -99,6 +99,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   bool EnableDS128 = false;
   bool EnablePRTStrictNull = false;
   bool DumpCode = false;
+  bool AssemblerPermissiveWavesize = false;
 
   // Subtarget statically properties set by tablegen
   bool FP64 = false;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 03d12e3899722..281a65799bdb2 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -98,10 +98,14 @@ createAMDGPUMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
                            : AMDGPU::FeatureWavefrontSize64);
   } else if (IsWave64 && IsWave32) {
     // The wave size is mutually exclusive. If both somehow end up set, wave64
-    // wins.
-    //
-    // FIXME: This should really just be an error.
-    STI->ToggleFeature(AMDGPU::FeatureWavefrontSize32);
+    // wins if supported.
+    STI->ToggleFeature(AMDGPU::supportsWave32(*STI)
+                           ? AMDGPU::FeatureWavefrontSize64
+                           : AMDGPU::FeatureWavefrontSize32);
+
+    // If both wavesizes were manually requested, hack in a feature to permit
+    // assembling modules with mixed wavesizes.
+    STI->ToggleFeature(AMDGPU::FeatureAssemblerPermissiveWavesize);
   }
 
   assert((STI->hasFeature(AMDGPU::FeatureWavefrontSize64) !=
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index fb2cd04b364d7..18a53931a6390 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -7,9 +7,11 @@
 //===----------------------------------------------------------------------===//
 
 def isWave32 : Predicate<"Subtarget->isWave32()">,
-  AssemblerPredicate <(all_of FeatureWavefrontSize32)>;
+  AssemblerPredicate <(any_of FeatureWavefrontSize32,
+                              FeatureAssemblerPermissiveWavesize)>;
 def isWave64 : Predicate<"Subtarget->isWave64()">,
-  AssemblerPredicate <(all_of FeatureWavefrontSize64)>;
+  AssemblerPredicate <(any_of FeatureWavefrontSize64,
+                              FeatureAssemblerPermissiveWavesize)>;
 
 class AMDGPUMnemonicAlias<string From, string To, string VariantName = "">
     : MnemonicAlias<From, To, VariantName>, PredicateControl;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 37b0262966160..746465448b4ed 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1568,6 +1568,11 @@ bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
 bool hasMAIInsts(const MCSubtargetInfo &STI);
 bool hasVOPD(const MCSubtargetInfo &STI);
 bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI);
+
+bool supportsWave32(const MCSubtargetInfo &STI) {
+  return AMDGPU::isGFX10Plus(STI) && !AMDGPU::isGFX1250(STI);
+}
+
 int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
 unsigned hasKernargPreload(const MCSubtargetInfo &STI);
 bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST);
diff --git a/llvm/test/MC/AMDGPU/wave_any.s b/llvm/test/MC/AMDGPU/wave_any.s
index 15b235a92d68e..27502eff89bfc 100644
--- a/llvm/test/MC/AMDGPU/wave_any.s
+++ b/llvm/test/MC/AMDGPU/wave_any.s
@@ -1,231 +1,229 @@
-// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 6
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefixes=GFX10 %s
-// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,+wavefrontsize64 -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error: --check-prefixes=GFX10-ERR %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,+wavefrontsize64 -show-encoding %s | FileCheck --check-prefix=GFX10 %s
 
 v_cmp_ge_i32_e32 s0, v0
-// GFX10: v_cmp_ge_i32_e32 vcc, s0, v0            ; encoding: [0x00,0x00,0x0c,0x7d]
+// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x0c,0x7d]
 
 v_cmp_ge_i32_e32 vcc_lo, s0, v1
-// GFX10-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v1 ; encoding: [0x00,0x02,0x0c,0x7d]
 
 v_cmp_ge_i32_e32 vcc, s0, v2
-// GFX10: v_cmp_ge_i32_e32 vcc, s0, v2            ; encoding: [0x00,0x04,0x0c,0x7d]
+// GFX10: v_cmp_ge_i32_e32 vcc_lo, s0, v2 ; encoding: [0x00,0x04,0x0c,0x7d]
 
 v_cmp_le_f16_sdwa s0, v3, v4 src0_sel:WORD_1 src1_sel:DWORD
-// GFX10-ERR: :[[@LINE-1]]:19: error: invalid operand for instruction
+// GFX10: v_cmp_le_f16_sdwa s0, v3, v4 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x08,0x96,0x7d,0x03,0x80,0x05,0x06]
 
 v_cmp_le_f16_sdwa s[0:1], v3, v4 src0_sel:WORD_1 src1_sel:DWORD
 // GFX10: v_cmp_le_f16_sdwa s[0:1], v3, v4 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x08,0x96,0x7d,0x03,0x80,0x05,0x06]
 
 v_cmp_class_f32_e32 vcc_lo, s0, v0
-// GFX10-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+// GFX10: v_cmp_class_f32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d]
 
 v_cmp_class_f32_e32 vcc, s0, v0
-// GFX10: v_cmp_class_f32_e32 vcc, s0, v0         ; encoding: [0x00,0x00,0x10,0x7d]
+// GFX10: v_cmp_class_f32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d]
 
 v_cmp_class_f16_sdwa vcc_lo, v1, v2 src0_sel:DWORD src1_sel:DWORD
-// GFX10-ERR: :[[@LINE-1]]:22: error: invalid operand for instruction
+// GFX10: v_cmp_class_f16_sdwa vcc_lo, v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x00,0x06,0x06]
 
 v_cmp_class_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD
 // GFX10: v_cmp_class_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x00,0x06,0x06]
 
 v_cmp_class_f16_sdwa s0, v1, v2 src0_sel:DWORD src1_sel:DWORD
-// GFX10-ERR: :[[@LINE-1]]:22: error: invalid operand for instruction
+// GFX10: v_cmp_class_f16_sdwa s0, v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x80,0x06,0x06]
 
 v_cmp_class_f16_sdwa s[0:1], v1, v2 src0_sel:DWORD src1_sel:DWORD
 // GFX10: v_cmp_class_f16_sdwa s[0:1], v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x80,0x06,0x06]
 
 v_cndmask_b32_e32 v1, v2, v3,
-// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc       ; encoding: [0x02,0x07,0x02,0x02]
+// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02]
 
 v_cndmask_b32_e32 v1, v2, v3, vcc_lo
-// GFX10-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02]
 
 v_cndmask_b32_e32 v1, v2, v3, vcc
-// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc       ; encoding: [0x02,0x07,0x02,0x02]
+// GFX10: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02]
 
 v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo
-// GFX10-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+// GFX10: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x50]
 
 v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc
-// GFX10: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x50]
+// GFX10: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x50]
 
 v_add_co_ci_u32_e32 v3, v3, v4
-// GFX10: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x50]
+// GFX10: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x50]
 
 v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo
-// GFX10-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+// GFX10: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x52]
 
 v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc
-// GFX10: v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x52]
+// GFX10: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x52]
 
 v_sub_co_ci_u32_e32 v3, v3, v4
-// GFX10: v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x52]
+// GFX10: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x52]
 
 v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
-// GFX10-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+// GFX10: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; encoding: [0x80,0x02,0x02,0x54]
 
 v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc
-// GFX10: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; encoding: [0x80,0x02,0x02,0x54]
+// GFX10: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; encoding: [0x80,0x02,0x02,0x54]
 
 v_subrev_co_ci_u32_e32 v1, 0, v1
-// GFX10: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; encoding: [0x80,0x02,0x02,0x54]
+// GFX10: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; encoding: [0x80,0x02,0x02,0x54]
 
 v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-// GFX10-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+// GFX10: v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06]
 
 v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
 // GFX10: v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06]
 
 v_add_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-// GFX10: v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06]
+// GFX10: v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06]
 
 v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-// GFX10-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+// GFX10: v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06]
 
 v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
 // GFX10: v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06]
 
 v_sub_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-// GFX10: v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06]
+// GFX10: v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06]
 
 v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-// GFX10-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+// GFX10: v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06]
 
 v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
 // GFX10: v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06]
 
 v_subrev_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-// GFX10: v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06]
+// GFX10: v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06]
 
 v_add_co_ci_u32 v1, sext(v1), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-// GFX10: v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e]
+// GFX10: v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e]
 
 v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-// GFX10-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+// GFX10: v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e]
 
 v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
 // GFX10: v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e]
 
 v_add_co_ci_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// GFX10: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00]
+// GFX10: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00]
 
 v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// GFX10-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+// GFX10: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00]
 
 v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
 // GFX10: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00]
 
 v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// GFX10-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+// GFX10: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x52,0x01,0xe4,0x00,0x00]
 
 v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
 // GFX10: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x52,0x01,0xe4,0x00,0x00]
 
 v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// GFX10-ERR: :[[@LINE-1]]:1: error: operands are not valid for this GPU or mode
+// GFX10: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x54,0x01,0xe4,0x00,0x00]
 
 v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
 // GFX10: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x54,0x01,0xe4,0x00,0x00]
 
 v_add_co_u32 v0, s0, v0, v2
-// GFX10-ERR: :[[@LINE-1]]:18: error: invalid operand for instruction
+// GFX10: v_add_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
 
 v_add_co_u32_e64 v0, s0, v0, v2
-// GFX10-ERR: :[[@LINE-1]]:22: error: invalid operand for instruction
+// GFX10: v_add_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
 
 v_add_co_ci_u32_e64 v4, s0, v1, v5, s2
-// GFX10-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction
+// GFX10: v_add_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0x0a,0x00]
 
 v_sub_co_u32 v0, s0, v0, v2
-// GFX10-ERR: :[[@LINE-1]]:18: error: invalid operand for instruction
+// GFX10: v_sub_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
 
 v_sub_co_u32_e64 v0, s0, v0, v2
-// GFX10-ERR: :[[@LINE-1]]:22: error: invalid operand for instruction
+// GFX10: v_sub_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
 
 v_sub_co_ci_u32_e64 v4, s0, v1, v5, s2
-// GFX10-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction
+// GFX10: v_sub_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x29,0xd5,0x01,0x0b,0x0a,0x00]
 
 v_subrev_co_u32 v0, s0, v0, v2
-// GFX10-ERR: :[[@LINE-1]]:21: error: invalid operand for instruction
+// GFX10: v_subrev_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
 
 v_subrev_co_u32_e64 v0, s0, v0, v2
-// GFX10-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction
+// GFX10: v_subrev_co_u32 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
 
 v_subrev_co_ci_u32_e64 v4, s0, v1, v5, s2
-// GFX10-ERR: :[[@LINE-1]]:28: error: invalid operand for instruction
+// GFX10: v_subrev_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x2a,0xd5,0x01,0x0b,0x0a,0x00]
 
 v_add_co_u32 v0, s[0:1], v0, v2
-// GFX10: v_add_co_u32 v0, s[0:1], v0, v2         ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
+// GFX10: v_add_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
 
 v_add_co_u32 v0, exec, v0, v2
-// GFX10: v_add_co_u32 v0, exec, v0, v2           ; encoding: [0x00,0x7e,0x0f,0xd7,0x00,0x05,0x02,0x00]
+// GFX10: v_add_co_u32 v0, exec, v0, v2 ; encoding: [0x00,0x7e,0x0f,0xd7,0x00,0x05,0x02,0x00]
 
 v_add_co_u32 v0, exec_lo, v0, v2
-// GFX10-ERR: :[[@LINE-1]]:18: error: invalid operand for instruction
+// GFX10: v_add_co_u32 v0, exec_lo, v0, v2 ; encoding: [0x00,0x7e,0x0f,0xd7,0x00,0x05,0x02,0x00]
 
 v_add_co_u32_e64 v0, s[0:1], v0, v2
-// GFX10: v_add_co_u32 v0, s[0:1], v0, v2         ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
+// GFX10: v_add_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
 
 v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
 // GFX10: v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0x0a,0x00]
 
 v_sub_co_u32 v0, s[0:1], v0, v2
-// GFX10: v_sub_co_u32 v0, s[0:1], v0, v2         ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
+// GFX10: v_sub_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
 
 v_sub_co_u32_e64 v0, s[0:1], v0, v2
-// GFX10: v_sub_co_u32 v0, s[0:1], v0, v2         ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
+// GFX10: v_sub_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
 
 v_sub_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
 // GFX10: v_sub_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] ; encoding: [0x04,0x00,0x29,0xd5,0x01,0x0b,0x0a,0x00]
 
 v_subrev_co_u32 v0, s[0:1], v0, v2
-// GFX10: v_subrev_co_u32 v0, s[0:1], v0, v2      ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
+// GFX10: v_subrev_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
 
 v_subrev_co_u32_e64 v0, s[0:1], v0, v2
-// GFX10: v_subrev_co_u32 v0, s[0:1], v0, v2      ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
+// GFX10: v_subrev_co_u32 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
 
 v_subrev_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
 // GFX10: v_subrev_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] ; encoding: [0x04,0x00,0x2a,0xd5,0x01,0x0b,0x0a,0x00]
 
 v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s2
-// GFX10-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction
+// GFX10: v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s2 ; encoding: [0x04,0x6a,0x28,0xd5,0x01,0x0b,0x0a,0x00]
 
 v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s[2:3]
-// GFX10-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction
+// GFX10: v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s[2:3] ; encoding: [0x04,0x6a,0x28,0xd5,0x01,0x0b,0x0a,0x00]
 
 v_add_co_ci_u32_e64 v4, s0, v1, v5, vcc_lo
-// GFX10-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction
+// GFX10: v_add_co_ci_u32_e64 v4, s0, v1, v5, vcc_lo ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0xaa,0x01]
 
 v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, vcc
 // GFX10: v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, vcc ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0xaa,0x01]
 
 v_div_scale_f32 v2, s2, v0, v0, v2
-// GFX10-ERR: :[[@LINE-1]]:21: error: invalid operand for instruction
+// GFX10: v_div_scale_f32 v2, s2, v0, v0, v2 ; encoding: [0x02,0x02,0x6d,0xd5,0x00,0x01,0x0a,0x04]
 
 v_div_scale_f32 v2, s[2:3], v0, v0, v2
-// GFX10: v_div_scale_f32 v2, s[2:3], v0, v0, v2  ; encoding: [0x02,0x02,0x6d,0xd5,0x00,0x01,0x0a,0x04]
+// GFX10: v_div_scale_f32 v2, s[2:3], v0, v0, v2 ; encoding: [0x02,0x02,0x6d,0xd5,0x00,0x01,0x0a,0x04]
 
 v_div_scale_f64 v[2:3], s2, v[0:1], v[0:1], v[2:3]
-// GFX10-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction
+// GFX10: v_div_scale_f64 v[2:3], s2, v[0:1], v[0:1], v[2:3] ; encoding: [0x02,0x02,0x6e,0xd5,0x00,0x01,0x0a,0x04]
 
 v_div_scale_f64 v[2:3], s[2:3], v[0:1], v[0:1], v[2:3]
 // GFX10: v_div_scale_f64 v[2:3], s[2:3], v[0:1], v[0:1], v[2:3] ; encoding: [0x02,0x02,0x6e,0xd5,0x00,0x01,0x0a,0x04]
 
 v_mad_i64_i32 v[0:1], s6, v0, v1, v[2:3]
-// GFX10-ERR: :[[@LINE-1]]:23: error: invalid operand for instruction
+// GFX10: v_mad_i64_i32 v[0:1], s6, v0, v1, v[2:3] ; encoding: [0x00,0x06,0x77,0xd5,0x00,0x03,0x0a,0x04]
 
 v_mad_i64_i32 v[0:1], s[6:7], v0, v1, v[2:3]
 // GFX10: v_mad_i64_i32 v[0:1], s[6:7], v0, v1, v[2:3] ; encoding: [0x00,0x06,0x77,0xd5,0x00,0x03,0x0a,0x04]
 
 v_mad_u64_u32 v[0:1], s6, v0, v1, v[2:3]
-// GFX10-ERR: :[[@LINE-1]]:23: error: invalid operand for instruction
+// GFX10: v_mad_u64_u32 v[0:1], s6, v0, v1, v[2:3] ; encoding: [0x00,0x06,0x76,0xd5,0x00,0x03,0x0a,0x04]
 
 v_mad_u64_u32 v[0:1], s[6:7], v0, v1, v[2:3]
 // GFX10: v_mad_u64_u32 v[0:1], s[6:7], v0, v1, v[2:3] ; encoding: [0x00,0x06,0x76,0xd5,0x00,0x03,0x0a,0x04]
 
 v_cmpx_neq_f32_e32 v0, v1
-// GFX10: v_cmpx_neq_f32_e32 v0, v1               ; encoding: [0x00,0x03,0x3a,0x7c]
+// GFX10: v_cmpx_neq_f32_e32 v0, v1 ; encoding: [0x00,0x03,0x3a,0x7c]
 
 v_cmpx_neq_f32_sdwa v0, v1 src0_sel:WORD_1 src1_sel:DWORD
 // GFX10: v_cmpx_neq_f32_sdwa v0, v1 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x02,0x3a,0x7c,0x00,0x00,0x05,0x06]
@@ -234,7 +232,7 @@ v_cmpx_eq_u32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD
 // GFX10: v_cmpx_eq_u32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x02,0xa5,0x7d,0x00,0x00,0x05,0x86]
 
 v_cmpx_class_f32_e64 v0, 1
-// GFX10: v_cmpx_class_f32_e64 v0, 1              ; encoding: [0x7e,0x00,0x98,0xd4,0x00,0x03,0x01,0x00]
+// GFX10: v_cmpx_class_f32_e64 v0, 1 ; encoding: [0x7e,0x00,0x98,0xd4,0x00,0x03,0x01,0x00]
 
 v_cmpx_class_f32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD
 // GFX10: v_cmpx_class_f32_sdwa v0, 1 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x02,0x31,0x7d,0x00,0x00,0x05,0x86]

>From 294ce1db8601519fea15aab5cc8f356fc12c306a Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 17 Sep 2025 20:19:34 +0900
Subject: [PATCH 7/9] inline

---
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 746465448b4ed..2b9c063f42a5e 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1569,7 +1569,7 @@ bool hasMAIInsts(const MCSubtargetInfo &STI);
 bool hasVOPD(const MCSubtargetInfo &STI);
 bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI);
 
-bool supportsWave32(const MCSubtargetInfo &STI) {
+inline bool supportsWave32(const MCSubtargetInfo &STI) {
   return AMDGPU::isGFX10Plus(STI) && !AMDGPU::isGFX1250(STI);
 }
 

>From 8e657608d96354632098576f2e9de9dbfa60d8a5 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 25 Sep 2025 17:26:18 +0900
Subject: [PATCH 8/9] Fix comments

---
 llvm/lib/Target/AMDGPU/AMDGPU.td                           | 2 +-
 llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp | 2 +-
 llvm/test/MC/AMDGPU/wavesize-feature-unsupported-target.s  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 5d33164c1aba7..eaa1870f4be28 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1238,7 +1238,7 @@ def FeatureSetPrioIncWgInst : SubtargetFeature<"setprio-inc-wg-inst",
 // Subtarget Features (options and debugging)
 //===------------------------------------------------------------===//
 
-// Ugly hack to accomodate an assembling modules with mixed
+// Ugly hack to accomodate assembling modules with mixed
 // wavesizes. Ideally we would have a mapping symbol in assembly which
 // would keep track of which sections of code should be treated as
 // wave32 and wave64. Instead what users do is assemble with both
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 281a65799bdb2..013cfeb364048 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -97,7 +97,7 @@ createAMDGPUMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
                            ? AMDGPU::FeatureWavefrontSize32
                            : AMDGPU::FeatureWavefrontSize64);
   } else if (IsWave64 && IsWave32) {
-    // The wave size is mutually exclusive. If both somehow end up set, wave64
+    // The wave size is mutually exclusive. If both somehow end up set, wave32
     // wins if supported.
     STI->ToggleFeature(AMDGPU::supportsWave32(*STI)
                            ? AMDGPU::FeatureWavefrontSize64
diff --git a/llvm/test/MC/AMDGPU/wavesize-feature-unsupported-target.s b/llvm/test/MC/AMDGPU/wavesize-feature-unsupported-target.s
index 8fc7b7fb05f0c..a56541796b6ca 100644
--- a/llvm/test/MC/AMDGPU/wavesize-feature-unsupported-target.s
+++ b/llvm/test/MC/AMDGPU/wavesize-feature-unsupported-target.s
@@ -1,7 +1,7 @@
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+wavefrontsize64 -o - %s | FileCheck -check-prefix=GFX1250 %s
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx900 -mattr=+wavefrontsize32 -o - %s | FileCheck -check-prefix=GFX900 %s
 
-// Both are supported, but not at the same time
+// Both sure setting both modes is supported at the same time.
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,+wavefrontsize64 %s | FileCheck -check-prefixes=GFX10 %s
 
 // Test that there is no assertion when using an explicit

>From 36f07bfdb41e642c988f3eb65269f27fdbc4711c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 25 Sep 2025 18:16:38 +0900
Subject: [PATCH 9/9] Update
 llvm/test/MC/AMDGPU/wavesize-feature-unsupported-target.s

---
 llvm/test/MC/AMDGPU/wavesize-feature-unsupported-target.s | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/MC/AMDGPU/wavesize-feature-unsupported-target.s b/llvm/test/MC/AMDGPU/wavesize-feature-unsupported-target.s
index a56541796b6ca..3a8656c392ff5 100644
--- a/llvm/test/MC/AMDGPU/wavesize-feature-unsupported-target.s
+++ b/llvm/test/MC/AMDGPU/wavesize-feature-unsupported-target.s
@@ -1,7 +1,7 @@
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+wavefrontsize64 -o - %s | FileCheck -check-prefix=GFX1250 %s
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx900 -mattr=+wavefrontsize32 -o - %s | FileCheck -check-prefix=GFX900 %s
 
-// Both sure setting both modes is supported at the same time.
+// Make sure setting both modes is supported at the same time.
 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,+wavefrontsize64 %s | FileCheck -check-prefixes=GFX10 %s
 
 // Test that there is no assertion when using an explicit



More information about the llvm-commits mailing list