[llvm] [AMDGPU] Check vector sizes for physical register constraints in inline asm (PR #109955)

Fabian Ritter via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 25 23:41:51 PDT 2024


https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/109955

>From 888bb396a89593e545eca88de9c0e463e5955d50 Mon Sep 17 00:00:00 2001
From: Fabian Ritter <fabian.ritter at amd.com>
Date: Wed, 25 Sep 2024 06:32:23 -0400
Subject: [PATCH 1/3] [AMDGPU] Check vector sizes for physical register
 constraints in inline asm

For register constraints that require specific register ranges, the width of
the range should match the type of the associated parameter/return value. With
this PR, we error out when that is not the case. Previously, these cases would
hit assertions or llvm_unreachables.

The handling of register constraints that require only a single register
remains more lenient to allow narrower non-vector types for the associated IR
values. For example, constraining an i16 or i8 value to a 32-bit register is
still allowed.

Fixes #101190.
---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |   7 +
 .../GlobalISel/inline-asm-mismatched-size.ll  |  44 ++---
 .../AMDGPU/inlineasm-mismatched-size.ll       | 157 ++++++++++++++++++
 3 files changed, 189 insertions(+), 19 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/inlineasm-mismatched-size.ll

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 08f2ff4566b674..eb8885577ca19d 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -15500,6 +15500,10 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI_,
         Failed |= !RegName.consume_back("]");
         if (!Failed) {
           uint32_t Width = (End - Idx + 1) * 32;
+          // Prohibit constraints for register ranges with a width that does not
+          // match the required type.
+          if (VT.SimpleTy != MVT::Other && Width != VT.getSizeInBits())
+            return std::pair(0U, nullptr);
           MCRegister Reg = RC->getRegister(Idx);
           if (SIRegisterInfo::isVGPRClass(RC))
             RC = TRI->getVGPRClassForBitWidth(Width);
@@ -15513,6 +15517,9 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI_,
           }
         }
       } else {
+        // Check for lossy scalar/vector conversions.
+        if (VT.isVector() && VT.getSizeInBits() != 32)
+          return std::pair(0U, nullptr);
         bool Failed = RegName.getAsInteger(10, Idx);
         if (!Failed && Idx < RC->getNumRegs())
           return std::pair(RC->getRegister(Idx), RC);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll
index 696cbdb75f1ed9..69567b34ae6e60 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll
@@ -3,11 +3,14 @@
 ; RUN: FileCheck -check-prefix=ERR %s < %t
 
 ; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  %sgpr = call <4 x i32> asm sideeffect "; def $0", "={s[8:12]}"()' (in function: return_type_is_too_big_vector)
+; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  %sgpr = call <4 x i32> asm sideeffect "; def $0", "={s[8:10]}"()' (in function: return_type_is_too_small_vector)
 ; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  %reg = call i64 asm sideeffect "; def $0", "={v8}"()' (in function: return_type_is_too_big_scalar)
+; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  %reg = call i32 asm sideeffect "; def $0", "={v[8:9]}"()' (in function: return_type_is_too_small_scalar)
 ; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  %reg = call ptr addrspace(1) asm sideeffect "; def $0", "={v8}"()' (in function: return_type_is_too_big_pointer)
 ; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  %reg = call ptr addrspace(3) asm sideeffect "; def $0", "={v[8:9]}"()' (in function: return_type_is_too_small_pointer)
 ; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  call void asm sideeffect "; use $0", "{v[0:9]}"(<8 x i32> %arg)' (in function: use_vector_too_big)
 ; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  call void asm sideeffect "; use $0", "{v0}"(i64 %arg)' (in function: use_scalar_too_small)
+; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  call void asm sideeffect "; use $0", "{v[0:1]}"(i32 %arg)' (in function: use_scalar_too_big)
 ; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  call void asm sideeffect "; use $0", "{v0}"(ptr addrspace(1) %arg)' (in function: use_pointer_too_small)
 ; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  call void asm sideeffect "; use $0", "{v[0:1]}"(ptr addrspace(3) %arg)' (in function: use_pointer_too_big)
 
@@ -24,18 +27,25 @@ define amdgpu_kernel void @return_type_is_too_big_vector() {
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr2_sgpr3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
   %sgpr = call <4 x i32> asm sideeffect "; def $0", "={s[8:12]}" ()
   call void asm sideeffect "; use $0", "s"(<4 x i32> %sgpr) #0
   ret void
 }
 
-; FIXME: This is crashing in the DAG
-; define amdgpu_kernel void @return_type_is_too_small_vector() {
-;   %sgpr = call <4 x i32> asm sideeffect "; def $0", "={s[8:10]}" ()
-;   call void asm sideeffect "; use $0", "s"(<4 x i32> %sgpr) #0
-;   ret void
-; }
+; This is broken because it requests 3 32-bit sgprs to handle a 4xi32 result.
+define amdgpu_kernel void @return_type_is_too_small_vector() {
+  ; CHECK-LABEL: name: return_type_is_too_small_vector
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $sgpr2_sgpr3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr2_sgpr3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1 (%ir-block.0):
+  %sgpr = call <4 x i32> asm sideeffect "; def $0", "={s[8:10]}" ()
+  call void asm sideeffect "; use $0", "s"(<4 x i32> %sgpr) #0
+  ret void
+}
 
 define i64 @return_type_is_too_big_scalar() {
   ; CHECK-LABEL: name: return_type_is_too_big_scalar
@@ -50,12 +60,10 @@ define i64 @return_type_is_too_big_scalar() {
 
 define i32 @return_type_is_too_small_scalar() {
   ; CHECK-LABEL: name: return_type_is_too_small_scalar
-  ; CHECK: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr8_vgpr9
-  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr8_vgpr9
-  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
-  ; CHECK-NEXT:   $vgpr0 = COPY [[TRUNC]](s32)
-  ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1 (%ir-block.0):
   %reg = call i32 asm sideeffect "; def $0", "={v[8:9]}" ()
   ret i32 %reg
 }
@@ -77,7 +85,6 @@ define ptr addrspace(3) @return_type_is_too_small_pointer() {
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1 (%ir-block.0):
-  ; CHECK-NEXT:   INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr8_vgpr9
   %reg = call ptr addrspace(3) asm sideeffect "; def $0", "={v[8:9]}" ()
   ret ptr addrspace(3) %reg
 }
@@ -141,14 +148,13 @@ define void @use_scalar_too_small(i64 %arg) {
 
 define void @use_scalar_too_big(i32 %arg) {
   ; CHECK-LABEL: name: use_scalar_too_big
-  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT:   liveins: $vgpr0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32)
-  ; CHECK-NEXT:   $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
-  ; CHECK-NEXT:   INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, $vgpr0_vgpr1
-  ; CHECK-NEXT:   SI_RETURN
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1 (%ir-block.0):
   call void asm sideeffect "; use $0", "{v[0:1]}"(i32 %arg)
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/inlineasm-mismatched-size.ll b/llvm/test/CodeGen/AMDGPU/inlineasm-mismatched-size.ll
new file mode 100644
index 00000000000000..abfb138936fea5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/inlineasm-mismatched-size.ll
@@ -0,0 +1,157 @@
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s 2>&1 | FileCheck -check-prefix=ERR %s
+
+; Diagnose register constraints that are not wide enough.
+
+; ERR: error: couldn't allocate output register for constraint '{v[8:15]}'
+define <9 x i32> @inline_asm_9xi32_in_8v_def() {
+  %asm = call <9 x i32> asm sideeffect "; def $0", "={v[8:15]}"()
+  ret <9 x i32> %asm
+}
+
+; ERR: error: couldn't allocate input reg for constraint '{v[8:15]}'
+define void @inline_asm_9xi32_in_8v_use(<9 x i32> %val) {
+  call void asm sideeffect "; use $0", "{v[8:15]}"(<9 x i32> %val)
+  ret void
+}
+
+; ERR: error: couldn't allocate output register for constraint '{s[8:15]}'
+define <9 x i32> @inline_asm_9xi32_in_8s_def() {
+  %asm = call <9 x i32> asm sideeffect "; def $0", "={s[8:15]}"()
+  ret <9 x i32> %asm
+}
+
+
+; Diagnose register constraints that are too wide.
+
+; ERR: error: couldn't allocate output register for constraint '{v[8:16]}'
+define <8 x i32> @inline_asm_8xi32_in_9v_def() {
+  %asm = call <8 x i32> asm sideeffect "; def $0", "={v[8:16]}"()
+  ret <8 x i32> %asm
+}
+
+; ERR: error: couldn't allocate input reg for constraint '{v[8:16]}'
+define void @inline_asm_8xi32_in_9v_use(<8 x i32> %val) {
+  call void asm sideeffect "; use $0", "{v[8:16]}"(<8 x i32> %val)
+  ret void
+}
+
+; ERR: error: couldn't allocate output register for constraint '{s[8:16]}'
+define <8 x i32> @inline_asm_8xi32_in_9s_def() {
+  %asm = call <8 x i32> asm sideeffect "; def $0", "={s[8:16]}"()
+  ret <8 x i32> %asm
+}
+
+
+; Diagnose mismatched scalars with register ranges
+
+; ERR: error: couldn't allocate output register for constraint '{s[4:5]}'
+define void @inline_asm_scalar_read_too_wide() {
+  %asm = call i32 asm sideeffect "; def $0 ", "={s[4:5]}"()
+  ret void
+}
+
+; ERR: error: couldn't allocate output register for constraint '{s[4:4]}'
+define void @inline_asm_scalar_read_too_narrow() {
+  %asm = call i64 asm sideeffect "; def $0 ", "={s[4:4]}"()
+  ret void
+}
+
+
+; Be more lenient with single registers that are too wide for the IR type:
+
+; ERR-NOT: error
+define i16 @inline_asm_i16_in_v_def() {
+  %asm = call i16 asm sideeffect "; def $0", "={v8}"()
+  ret i16 %asm
+}
+
+; ERR-NOT: error
+define void @inline_asm_i16_in_v_use(i16 %val) {
+  call void asm sideeffect "; use $0", "{v8}"(i16 %val)
+  ret void
+}
+
+; ERR-NOT: error
+define i16 @inline_asm_i16_in_s_def() {
+  %asm = call i16 asm sideeffect "; def $0", "={s8}"()
+  ret i16 %asm
+}
+
+; ERR-NOT: error
+define i8 @inline_asm_i8_in_v_def() {
+  %asm = call i8 asm sideeffect "; def $0", "={v8}"()
+  ret i8 %asm
+}
+
+; ERR-NOT: error
+define void @inline_asm_i8_in_v_use(i8 %val) {
+  call void asm sideeffect "; use $0", "{v8}"(i8 %val)
+  ret void
+}
+
+; ERR-NOT: error
+define i8 @inline_asm_i8_in_s_def() {
+  %asm = call i8 asm sideeffect "; def $0", "={s8}"()
+  ret i8 %asm
+}
+
+
+; Single registers for vector types that are too wide or too narrow should be
+; diagnosed.
+
+; ERR: error: couldn't allocate input reg for constraint '{v8}'
+define void @inline_asm_4xi32_in_v_use(<4 x i32> %val) {
+  call void asm sideeffect "; use $0", "{v8}"(<4 x i32> %val)
+  ret void
+}
+
+; ERR: error: couldn't allocate output register for constraint '{v8}'
+define <4 x i32> @inline_asm_4xi32_in_v_def() {
+  %asm = call <4 x i32> asm sideeffect "; def $0", "={v8}"()
+  ret <4 x i32> %asm
+}
+
+; ERR: error: couldn't allocate output register for constraint '{s8}'
+define <4 x i32> @inline_asm_4xi32_in_s_def() {
+  %asm = call <4 x i32> asm sideeffect "; def $0", "={s8}"()
+  ret <4 x i32> %asm
+}
+
+; ERR: error: couldn't allocate input reg for constraint '{v8}'
+define void @inline_asm_2xi8_in_v_use(<2 x i8> %val) {
+  call void asm sideeffect "; use $0", "{v8}"(<2 x i8> %val)
+  ret void
+}
+
+; ERR: error: couldn't allocate output register for constraint '{v8}'
+define <2 x i8> @inline_asm_2xi8_in_v_def() {
+  %asm = call <2 x i8> asm sideeffect "; def $0", "={v8}"()
+  ret <2 x i8> %asm
+}
+
+; ERR: error: couldn't allocate output register for constraint '{s8}'
+define <2 x i8> @inline_asm_2xi8_in_s_def() {
+  %asm = call <2 x i8> asm sideeffect "; def $0", "={s8}"()
+  ret <2 x i8> %asm
+}
+
+
+; Single registers for vector types that fit are fine.
+
+; ERR-NOT: error
+define void @inline_asm_2xi16_in_v_use(<2 x i16> %val) {
+  call void asm sideeffect "; use $0", "{v8}"(<2 x i16> %val)
+  ret void
+}
+
+; ERR-NOT: error
+define <2 x i16> @inline_asm_2xi16_in_v_def() {
+  %asm = call <2 x i16> asm sideeffect "; def $0", "={v8}"()
+  ret <2 x i16> %asm
+}
+
+; ERR-NOT: error
+define <2 x i16> @inline_asm_2xi16_in_s_def() {
+  %asm = call <2 x i16> asm sideeffect "; def $0", "={s8}"()
+  ret <2 x i16> %asm
+}

>From 0611cf6067f214486c7cdda609863c3a5b369129 Mon Sep 17 00:00:00 2001
From: Fabian Ritter <fabian.ritter at amd.com>
Date: Wed, 25 Sep 2024 08:33:42 -0400
Subject: [PATCH 2/3] fixup! [AMDGPU] Check vector sizes for physical register
 constraints in inline asm

---
 .../AMDGPU/inlineasm-mismatched-size-error.ll | 114 +++++++
 .../AMDGPU/inlineasm-mismatched-size.ll       | 285 +++++++++++-------
 2 files changed, 287 insertions(+), 112 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/inlineasm-mismatched-size-error.ll

diff --git a/llvm/test/CodeGen/AMDGPU/inlineasm-mismatched-size-error.ll b/llvm/test/CodeGen/AMDGPU/inlineasm-mismatched-size-error.ll
new file mode 100644
index 00000000000000..6dde02ee9853c2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/inlineasm-mismatched-size-error.ll
@@ -0,0 +1,114 @@
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s 2>&1 | FileCheck -check-prefix=ERR %s
+
+; Diagnose register constraints that are not wide enough.
+
+; ERR: error: couldn't allocate output register for constraint '{v[8:15]}'
+define <9 x i32> @inline_asm_9xi32_in_8v_def() {
+  %asm = call <9 x i32> asm sideeffect "; def $0", "={v[8:15]}"()
+  ret <9 x i32> %asm
+}
+
+; ERR: error: couldn't allocate input reg for constraint '{v[8:15]}'
+define void @inline_asm_9xi32_in_8v_use(<9 x i32> %val) {
+  call void asm sideeffect "; use $0", "{v[8:15]}"(<9 x i32> %val)
+  ret void
+}
+
+; ERR: error: couldn't allocate output register for constraint '{s[8:15]}'
+define <9 x i32> @inline_asm_9xi32_in_8s_def() {
+  %asm = call <9 x i32> asm sideeffect "; def $0", "={s[8:15]}"()
+  ret <9 x i32> %asm
+}
+
+
+; Diagnose register constraints that are too wide.
+
+; ERR: error: couldn't allocate output register for constraint '{v[8:16]}'
+define <8 x i32> @inline_asm_8xi32_in_9v_def() {
+  %asm = call <8 x i32> asm sideeffect "; def $0", "={v[8:16]}"()
+  ret <8 x i32> %asm
+}
+
+; ERR: error: couldn't allocate input reg for constraint '{v[8:16]}'
+define void @inline_asm_8xi32_in_9v_use(<8 x i32> %val) {
+  call void asm sideeffect "; use $0", "{v[8:16]}"(<8 x i32> %val)
+  ret void
+}
+
+; ERR: error: couldn't allocate output register for constraint '{s[8:16]}'
+define <8 x i32> @inline_asm_8xi32_in_9s_def() {
+  %asm = call <8 x i32> asm sideeffect "; def $0", "={s[8:16]}"()
+  ret <8 x i32> %asm
+}
+
+
+; Diagnose mismatched scalars with register ranges
+
+; ERR: error: couldn't allocate output register for constraint '{s[4:5]}'
+define void @inline_asm_scalar_read_too_wide() {
+  %asm = call i32 asm sideeffect "; def $0 ", "={s[4:5]}"()
+  ret void
+}
+
+; ERR: error: couldn't allocate output register for constraint '{s[4:4]}'
+define void @inline_asm_scalar_read_too_narrow() {
+  %asm = call i64 asm sideeffect "; def $0 ", "={s[4:4]}"()
+  ret void
+}
+
+; Single registers for vector types that are too wide or too narrow should be
+; diagnosed.
+
+; ERR: error: couldn't allocate input reg for constraint '{v8}'
+define void @inline_asm_4xi32_in_v_use(<4 x i32> %val) {
+  call void asm sideeffect "; use $0", "{v8}"(<4 x i32> %val)
+  ret void
+}
+
+; ERR: error: couldn't allocate output register for constraint '{v8}'
+define <4 x i32> @inline_asm_4xi32_in_v_def() {
+  %asm = call <4 x i32> asm sideeffect "; def $0", "={v8}"()
+  ret <4 x i32> %asm
+}
+
+; ERR: error: couldn't allocate output register for constraint '{s8}'
+define <4 x i32> @inline_asm_4xi32_in_s_def() {
+  %asm = call <4 x i32> asm sideeffect "; def $0", "={s8}"()
+  ret <4 x i32> %asm
+}
+
+; ERR: error: couldn't allocate input reg for constraint '{v8}'
+define void @inline_asm_2xi8_in_v_use(<2 x i8> %val) {
+  call void asm sideeffect "; use $0", "{v8}"(<2 x i8> %val)
+  ret void
+}
+
+; ERR: error: couldn't allocate input reg for constraint 'v'
+define void @inline_asm_2xi8_in_vvirt_use(<2 x i8> %val) {
+  call void asm sideeffect "; use $0", "v"(<2 x i8> %val)
+  ret void
+}
+
+; ERR: error: couldn't allocate output register for constraint '{v8}'
+define <2 x i8> @inline_asm_2xi8_in_v_def() {
+  %asm = call <2 x i8> asm sideeffect "; def $0", "={v8}"()
+  ret <2 x i8> %asm
+}
+
+; ERR: error: couldn't allocate output register for constraint 'v'
+define <2 x i8> @inline_asm_2xi8_in_vvirt_def() {
+  %asm = call <2 x i8> asm sideeffect "; def $0", "=v"()
+  ret <2 x i8> %asm
+}
+
+; ERR: error: couldn't allocate output register for constraint '{s8}'
+define <2 x i8> @inline_asm_2xi8_in_s_def() {
+  %asm = call <2 x i8> asm sideeffect "; def $0", "={s8}"()
+  ret <2 x i8> %asm
+}
+
+; ERR: error: couldn't allocate output register for constraint 's'
+define <2 x i8> @inline_asm_2xi8_in_svirt_def() {
+  %asm = call <2 x i8> asm sideeffect "; def $0", "=s"()
+  ret <2 x i8> %asm
+}
diff --git a/llvm/test/CodeGen/AMDGPU/inlineasm-mismatched-size.ll b/llvm/test/CodeGen/AMDGPU/inlineasm-mismatched-size.ll
index abfb138936fea5..2691ec79cf35b3 100644
--- a/llvm/test/CodeGen/AMDGPU/inlineasm-mismatched-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/inlineasm-mismatched-size.ll
@@ -1,157 +1,218 @@
-; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s 2>&1 | FileCheck -check-prefix=ERR %s
-
-; Diagnose register constraints that are not wide enough.
-
-; ERR: error: couldn't allocate output register for constraint '{v[8:15]}'
-define <9 x i32> @inline_asm_9xi32_in_8v_def() {
-  %asm = call <9 x i32> asm sideeffect "; def $0", "={v[8:15]}"()
-  ret <9 x i32> %asm
-}
-
-; ERR: error: couldn't allocate input reg for constraint '{v[8:15]}'
-define void @inline_asm_9xi32_in_8v_use(<9 x i32> %val) {
-  call void asm sideeffect "; use $0", "{v[8:15]}"(<9 x i32> %val)
-  ret void
-}
-
-; ERR: error: couldn't allocate output register for constraint '{s[8:15]}'
-define <9 x i32> @inline_asm_9xi32_in_8s_def() {
-  %asm = call <9 x i32> asm sideeffect "; def $0", "={s[8:15]}"()
-  ret <9 x i32> %asm
-}
-
-
-; Diagnose register constraints that are too wide.
-
-; ERR: error: couldn't allocate output register for constraint '{v[8:16]}'
-define <8 x i32> @inline_asm_8xi32_in_9v_def() {
-  %asm = call <8 x i32> asm sideeffect "; def $0", "={v[8:16]}"()
-  ret <8 x i32> %asm
-}
-
-; ERR: error: couldn't allocate input reg for constraint '{v[8:16]}'
-define void @inline_asm_8xi32_in_9v_use(<8 x i32> %val) {
-  call void asm sideeffect "; use $0", "{v[8:16]}"(<8 x i32> %val)
-  ret void
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=CHECK %s
+
+; Allow single registers that are too wide for the IR type:
+
+define i16 @inline_asm_i16_in_vphys_def() {
+; CHECK-LABEL: inline_asm_i16_in_vphys_def:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def v8
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_mov_b32_e32 v0, v8
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %asm = call i16 asm sideeffect "; def $0", "={v8}"()
+  ret i16 %asm
 }
 
-; ERR: error: couldn't allocate output register for constraint '{s[8:16]}'
-define <8 x i32> @inline_asm_8xi32_in_9s_def() {
-  %asm = call <8 x i32> asm sideeffect "; def $0", "={s[8:16]}"()
-  ret <8 x i32> %asm
+define i16 @inline_asm_i16_in_vvirt_def() {
+; CHECK-LABEL: inline_asm_i16_in_vvirt_def:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def v0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %asm = call i16 asm sideeffect "; def $0", "=v"()
+  ret i16 %asm
 }
 
-
-; Diagnose mismatched scalars with register ranges
-
-; ERR: error: couldn't allocate output register for constraint '{s[4:5]}'
-define void @inline_asm_scalar_read_too_wide() {
-  %asm = call i32 asm sideeffect "; def $0 ", "={s[4:5]}"()
+define void @inline_asm_i16_in_vphys_use(i16 %val) {
+; CHECK-LABEL: inline_asm_i16_in_vphys_use:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_and_b32_e32 v8, 0xffff, v0
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v8
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  call void asm sideeffect "; use $0", "{v8}"(i16 %val)
   ret void
 }
 
-; ERR: error: couldn't allocate output register for constraint '{s[4:4]}'
-define void @inline_asm_scalar_read_too_narrow() {
-  %asm = call i64 asm sideeffect "; def $0 ", "={s[4:4]}"()
+define void @inline_asm_i16_in_vvirt_use(i16 %val) {
+; CHECK-LABEL: inline_asm_i16_in_vvirt_use:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_and_b32_e32 v0, 0xffff, v0
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  call void asm sideeffect "; use $0", "v"(i16 %val)
   ret void
 }
 
-
-; Be more lenient with single registers that are too wide for the IR type:
-
-; ERR-NOT: error
-define i16 @inline_asm_i16_in_v_def() {
-  %asm = call i16 asm sideeffect "; def $0", "={v8}"()
+define i16 @inline_asm_i16_in_sphys_def() {
+; CHECK-LABEL: inline_asm_i16_in_sphys_def:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def s8
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_mov_b32_e32 v0, s8
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %asm = call i16 asm sideeffect "; def $0", "={s8}"()
   ret i16 %asm
 }
 
-; ERR-NOT: error
-define void @inline_asm_i16_in_v_use(i16 %val) {
-  call void asm sideeffect "; use $0", "{v8}"(i16 %val)
-  ret void
-}
-
-; ERR-NOT: error
-define i16 @inline_asm_i16_in_s_def() {
-  %asm = call i16 asm sideeffect "; def $0", "={s8}"()
+define i16 @inline_asm_i16_in_svirt_def() {
+; CHECK-LABEL: inline_asm_i16_in_svirt_def:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def s4
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_mov_b32_e32 v0, s4
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %asm = call i16 asm sideeffect "; def $0", "=s"()
   ret i16 %asm
 }
 
-; ERR-NOT: error
-define i8 @inline_asm_i8_in_v_def() {
+define i8 @inline_asm_i8_in_vphys_def() {
+; CHECK-LABEL: inline_asm_i8_in_vphys_def:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def v8
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_mov_b32_e32 v0, v8
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %asm = call i8 asm sideeffect "; def $0", "={v8}"()
   ret i8 %asm
 }
 
-; ERR-NOT: error
-define void @inline_asm_i8_in_v_use(i8 %val) {
+; currently fails
+; define i8 @inline_asm_i8_in_vvirt_def() {
+;   %asm = call i8 asm sideeffect "; def $0", "=v"()
+;   ret i8 %asm
+; }
+
+; currently broken, v8 should be set to v0 & 0xFF
+define void @inline_asm_i8_in_vphys_use(i8 %val) {
+; CHECK-LABEL: inline_asm_i8_in_vphys_use:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_mov_b32_e32 v8, v0
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v8
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
   call void asm sideeffect "; use $0", "{v8}"(i8 %val)
   ret void
 }
 
-; ERR-NOT: error
-define i8 @inline_asm_i8_in_s_def() {
+; currently fails
+; define void @inline_asm_i8_in_vvirt_use(i8 %val) {
+;   call void asm sideeffect "; use $0", "v"(i8 %val)
+;   ret void
+; }
+
+define i8 @inline_asm_i8_in_sphys_def() {
+; CHECK-LABEL: inline_asm_i8_in_sphys_def:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def s8
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_mov_b32_e32 v0, s8
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %asm = call i8 asm sideeffect "; def $0", "={s8}"()
   ret i8 %asm
 }
 
+; currently fails
+; define i8 @inline_asm_i8_in_svirt_def() {
+;   %asm = call i8 asm sideeffect "; def $0", "=s"()
+;   ret i8 %asm
+; }
 
-; Single registers for vector types that are too wide or too narrow should be
-; diagnosed.
-
-; ERR: error: couldn't allocate input reg for constraint '{v8}'
-define void @inline_asm_4xi32_in_v_use(<4 x i32> %val) {
-  call void asm sideeffect "; use $0", "{v8}"(<4 x i32> %val)
-  ret void
-}
-
-; ERR: error: couldn't allocate output register for constraint '{v8}'
-define <4 x i32> @inline_asm_4xi32_in_v_def() {
-  %asm = call <4 x i32> asm sideeffect "; def $0", "={v8}"()
-  ret <4 x i32> %asm
-}
 
-; ERR: error: couldn't allocate output register for constraint '{s8}'
-define <4 x i32> @inline_asm_4xi32_in_s_def() {
-  %asm = call <4 x i32> asm sideeffect "; def $0", "={s8}"()
-  ret <4 x i32> %asm
-}
+; Single registers for vector types that fit are fine.
 
-; ERR: error: couldn't allocate input reg for constraint '{v8}'
-define void @inline_asm_2xi8_in_v_use(<2 x i8> %val) {
-  call void asm sideeffect "; use $0", "{v8}"(<2 x i8> %val)
+define void @inline_asm_2xi16_in_vphys_use(<2 x i16> %val) {
+; CHECK-LABEL: inline_asm_2xi16_in_vphys_use:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_mov_b32_e32 v8, v0
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v8
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  call void asm sideeffect "; use $0", "{v8}"(<2 x i16> %val)
   ret void
 }
 
-; ERR: error: couldn't allocate output register for constraint '{v8}'
-define <2 x i8> @inline_asm_2xi8_in_v_def() {
-  %asm = call <2 x i8> asm sideeffect "; def $0", "={v8}"()
-  ret <2 x i8> %asm
+define void @inline_asm_2xi16_in_vvirt_use(<2 x i16> %val) {
+; CHECK-LABEL: inline_asm_2xi16_in_vvirt_use:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  call void asm sideeffect "; use $0", "v"(<2 x i16> %val)
+  ret void
 }
 
-; ERR: error: couldn't allocate output register for constraint '{s8}'
-define <2 x i8> @inline_asm_2xi8_in_s_def() {
-  %asm = call <2 x i8> asm sideeffect "; def $0", "={s8}"()
-  ret <2 x i8> %asm
+define <2 x i16> @inline_asm_2xi16_in_vphys_def() {
+; CHECK-LABEL: inline_asm_2xi16_in_vphys_def:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def v8
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_mov_b32_e32 v0, v8
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %asm = call <2 x i16> asm sideeffect "; def $0", "={v8}"()
+  ret <2 x i16> %asm
 }
 
-
-; Single registers for vector types that fit are fine.
-
-; ERR-NOT: error
-define void @inline_asm_2xi16_in_v_use(<2 x i16> %val) {
-  call void asm sideeffect "; use $0", "{v8}"(<2 x i16> %val)
-  ret void
+define <2 x i16> @inline_asm_2xi16_in_vvirt_def() {
+; CHECK-LABEL: inline_asm_2xi16_in_vvirt_def:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def v0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %asm = call <2 x i16> asm sideeffect "; def $0", "=v"()
+  ret <2 x i16> %asm
 }
 
-; ERR-NOT: error
-define <2 x i16> @inline_asm_2xi16_in_v_def() {
-  %asm = call <2 x i16> asm sideeffect "; def $0", "={v8}"()
+define <2 x i16> @inline_asm_2xi16_in_sphys_def() {
+; CHECK-LABEL: inline_asm_2xi16_in_sphys_def:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def s8
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_mov_b32_e32 v0, s8
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %asm = call <2 x i16> asm sideeffect "; def $0", "={s8}"()
   ret <2 x i16> %asm
 }
 
-; ERR-NOT: error
-define <2 x i16> @inline_asm_2xi16_in_s_def() {
-  %asm = call <2 x i16> asm sideeffect "; def $0", "={s8}"()
+define <2 x i16> @inline_asm_2xi16_in_svirt_def() {
+; CHECK-LABEL: inline_asm_2xi16_in_svirt_def:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; def s4
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_mov_b32_e32 v0, s4
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %asm = call <2 x i16> asm sideeffect "; def $0", "=s"()
   ret <2 x i16> %asm
 }

>From 4f4c2ca26aa39d3de5e3a94d3fc35bbbb2c2b0c5 Mon Sep 17 00:00:00 2001
From: Fabian Ritter <ritter.x2a at gmail.com>
Date: Thu, 26 Sep 2024 08:41:43 +0200
Subject: [PATCH 3/3] Update
 llvm/test/CodeGen/AMDGPU/inlineasm-mismatched-size-error.ll

Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
 llvm/test/CodeGen/AMDGPU/inlineasm-mismatched-size-error.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/AMDGPU/inlineasm-mismatched-size-error.ll b/llvm/test/CodeGen/AMDGPU/inlineasm-mismatched-size-error.ll
index 6dde02ee9853c2..48d1de98b5f76c 100644
--- a/llvm/test/CodeGen/AMDGPU/inlineasm-mismatched-size-error.ll
+++ b/llvm/test/CodeGen/AMDGPU/inlineasm-mismatched-size-error.ll
@@ -1,4 +1,4 @@
-; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s 2>&1 | FileCheck -check-prefix=ERR %s
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR %s
 
 ; Diagnose register constraints that are not wide enough.
 



More information about the llvm-commits mailing list