[llvm] 3ba4092 - [AMDGPU] Check vector sizes for physical register constraints in inline asm (#109955)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 1 01:29:39 PDT 2024
Author: Fabian Ritter
Date: 2024-10-01T10:29:35+02:00
New Revision: 3ba4092c066b589d6c16fcca2f2826cd6f51140f
URL: https://github.com/llvm/llvm-project/commit/3ba4092c066b589d6c16fcca2f2826cd6f51140f
DIFF: https://github.com/llvm/llvm-project/commit/3ba4092c066b589d6c16fcca2f2826cd6f51140f.diff
LOG: [AMDGPU] Check vector sizes for physical register constraints in inline asm (#109955)
For register constraints that require specific register ranges, the
width of the range should match the type of the associated
parameter/return value. With this PR, we error out when that is not the
case. Previously, these cases would hit assertions or llvm_unreachables.
The handling of register constraints that require only a single register
remains more lenient to allow narrower non-vector types for the
associated IR values. For example, constraining an i16 or i8 value to a
32-bit register is still allowed.
Fixes #101190.
---------
Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
Added:
llvm/test/CodeGen/AMDGPU/inlineasm-mismatched-size-error.ll
llvm/test/CodeGen/AMDGPU/inlineasm-mismatched-size.ll
Modified:
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index d559d0446b9d8f..5e4cf705cc9e47 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -15510,6 +15510,10 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI_,
Failed |= !RegName.consume_back("]");
if (!Failed) {
uint32_t Width = (End - Idx + 1) * 32;
+ // Prohibit constraints for register ranges with a width that does not
+ // match the required type.
+ if (VT.SimpleTy != MVT::Other && Width != VT.getSizeInBits())
+ return std::pair(0U, nullptr);
MCRegister Reg = RC->getRegister(Idx);
if (SIRegisterInfo::isVGPRClass(RC))
RC = TRI->getVGPRClassForBitWidth(Width);
@@ -15523,6 +15527,9 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI_,
}
}
} else {
+ // Check for lossy scalar/vector conversions.
+ if (VT.isVector() && VT.getSizeInBits() != 32)
+ return std::pair(0U, nullptr);
bool Failed = RegName.getAsInteger(10, Idx);
if (!Failed && Idx < RC->getNumRegs())
return std::pair(RC->getRegister(Idx), RC);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll
index 696cbdb75f1ed9..69567b34ae6e60 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll
@@ -3,11 +3,14 @@
; RUN: FileCheck -check-prefix=ERR %s < %t
; ERR: remark: <unknown>:0:0: unable to translate instruction: call: ' %sgpr = call <4 x i32> asm sideeffect "; def $0", "={s[8:12]}"()' (in function: return_type_is_too_big_vector)
+; ERR: remark: <unknown>:0:0: unable to translate instruction: call: ' %sgpr = call <4 x i32> asm sideeffect "; def $0", "={s[8:10]}"()' (in function: return_type_is_too_small_vector)
; ERR: remark: <unknown>:0:0: unable to translate instruction: call: ' %reg = call i64 asm sideeffect "; def $0", "={v8}"()' (in function: return_type_is_too_big_scalar)
+; ERR: remark: <unknown>:0:0: unable to translate instruction: call: ' %reg = call i32 asm sideeffect "; def $0", "={v[8:9]}"()' (in function: return_type_is_too_small_scalar)
; ERR: remark: <unknown>:0:0: unable to translate instruction: call: ' %reg = call ptr addrspace(1) asm sideeffect "; def $0", "={v8}"()' (in function: return_type_is_too_big_pointer)
; ERR: remark: <unknown>:0:0: unable to translate instruction: call: ' %reg = call ptr addrspace(3) asm sideeffect "; def $0", "={v[8:9]}"()' (in function: return_type_is_too_small_pointer)
; ERR: remark: <unknown>:0:0: unable to translate instruction: call: ' call void asm sideeffect "; use $0", "{v[0:9]}"(<8 x i32> %arg)' (in function: use_vector_too_big)
; ERR: remark: <unknown>:0:0: unable to translate instruction: call: ' call void asm sideeffect "; use $0", "{v0}"(i64 %arg)' (in function: use_scalar_too_small)
+; ERR: remark: <unknown>:0:0: unable to translate instruction: call: ' call void asm sideeffect "; use $0", "{v[0:1]}"(i32 %arg)' (in function: use_scalar_too_big)
; ERR: remark: <unknown>:0:0: unable to translate instruction: call: ' call void asm sideeffect "; use $0", "{v0}"(ptr addrspace(1) %arg)' (in function: use_pointer_too_small)
; ERR: remark: <unknown>:0:0: unable to translate instruction: call: ' call void asm sideeffect "; use $0", "{v[0:1]}"(ptr addrspace(3) %arg)' (in function: use_pointer_too_big)
@@ -24,18 +27,25 @@ define amdgpu_kernel void @return_type_is_too_big_vector() {
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr2_sgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1 (%ir-block.0):
- ; CHECK-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
%sgpr = call <4 x i32> asm sideeffect "; def $0", "={s[8:12]}" ()
call void asm sideeffect "; use $0", "s"(<4 x i32> %sgpr) #0
ret void
}
-; FIXME: This is crashing in the DAG
-; define amdgpu_kernel void @return_type_is_too_small_vector() {
-; %sgpr = call <4 x i32> asm sideeffect "; def $0", "={s[8:10]}" ()
-; call void asm sideeffect "; use $0", "s"(<4 x i32> %sgpr) #0
-; ret void
-; }
+; This is broken because it requests 3 32-bit sgprs to handle a 4xi32 result.
+define amdgpu_kernel void @return_type_is_too_small_vector() {
+ ; CHECK-LABEL: name: return_type_is_too_small_vector
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1 (%ir-block.0):
+ %sgpr = call <4 x i32> asm sideeffect "; def $0", "={s[8:10]}" ()
+ call void asm sideeffect "; use $0", "s"(<4 x i32> %sgpr) #0
+ ret void
+}
define i64 @return_type_is_too_big_scalar() {
; CHECK-LABEL: name: return_type_is_too_big_scalar
@@ -50,12 +60,10 @@ define i64 @return_type_is_too_big_scalar() {
define i32 @return_type_is_too_small_scalar() {
; CHECK-LABEL: name: return_type_is_too_small_scalar
- ; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr8_vgpr9
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr8_vgpr9
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
- ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](s32)
- ; CHECK-NEXT: SI_RETURN implicit $vgpr0
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1 (%ir-block.0):
%reg = call i32 asm sideeffect "; def $0", "={v[8:9]}" ()
ret i32 %reg
}
@@ -77,7 +85,6 @@ define ptr addrspace(3) @return_type_is_too_small_pointer() {
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1 (%ir-block.0):
- ; CHECK-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr8_vgpr9
%reg = call ptr addrspace(3) asm sideeffect "; def $0", "={v[8:9]}" ()
ret ptr addrspace(3) %reg
}
@@ -141,14 +148,13 @@ define void @use_scalar_too_small(i64 %arg) {
define void @use_scalar_too_big(i32 %arg) {
; CHECK-LABEL: name: use_scalar_too_big
- ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32)
- ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
- ; CHECK-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, $vgpr0_vgpr1
- ; CHECK-NEXT: SI_RETURN
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1 (%ir-block.0):
call void asm sideeffect "; use $0", "{v[0:1]}"(i32 %arg)
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/inlineasm-mismatched-size-error.ll b/llvm/test/CodeGen/AMDGPU/inlineasm-mismatched-size-error.ll
new file mode 100644
index 00000000000000..723e0f2e7152d0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/inlineasm-mismatched-size-error.ll
@@ -0,0 +1,104 @@
+; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=null %s 2>&1 | FileCheck -check-prefix=ERR %s
+
+; Diagnose register constraints that are not wide enough.
+
+; ERR: error: couldn't allocate output register for constraint '{v[8:15]}'
+define <9 x i32> @inline_asm_9xi32_in_8v_def() {
+ %asm = call <9 x i32> asm sideeffect "; def $0", "={v[8:15]}"()
+ ret <9 x i32> %asm
+}
+
+; ERR: error: couldn't allocate input reg for constraint '{v[8:15]}'
+define void @inline_asm_9xi32_in_8v_use(<9 x i32> %val) {
+ call void asm sideeffect "; use $0", "{v[8:15]}"(<9 x i32> %val)
+ ret void
+}
+
+; ERR: error: couldn't allocate output register for constraint '{s[8:15]}'
+define <9 x i32> @inline_asm_9xi32_in_8s_def() {
+ %asm = call <9 x i32> asm sideeffect "; def $0", "={s[8:15]}"()
+ ret <9 x i32> %asm
+}
+
+
+; Diagnose register constraints that are too wide.
+
+; ERR: error: couldn't allocate output register for constraint '{v[8:16]}'
+define <8 x i32> @inline_asm_8xi32_in_9v_def() {
+ %asm = call <8 x i32> asm sideeffect "; def $0", "={v[8:16]}"()
+ ret <8 x i32> %asm
+}
+
+; ERR: error: couldn't allocate input reg for constraint '{v[8:16]}'
+define void @inline_asm_8xi32_in_9v_use(<8 x i32> %val) {
+ call void asm sideeffect "; use $0", "{v[8:16]}"(<8 x i32> %val)
+ ret void
+}
+
+; ERR: error: couldn't allocate output register for constraint '{s[8:16]}'
+define <8 x i32> @inline_asm_8xi32_in_9s_def() {
+ %asm = call <8 x i32> asm sideeffect "; def $0", "={s[8:16]}"()
+ ret <8 x i32> %asm
+}
+
+
+; Diagnose mismatched scalars with register ranges
+
+; ERR: error: couldn't allocate output register for constraint '{s[4:5]}'
+define void @inline_asm_scalar_read_too_wide() {
+ %asm = call i32 asm sideeffect "; def $0 ", "={s[4:5]}"()
+ ret void
+}
+
+; ERR: error: couldn't allocate output register for constraint '{s[4:4]}'
+define void @inline_asm_scalar_read_too_narrow() {
+ %asm = call i64 asm sideeffect "; def $0 ", "={s[4:4]}"()
+ ret void
+}
+
+; Single registers for vector types that are too wide or too narrow should be
+; diagnosed.
+
+; ERR: error: couldn't allocate input reg for constraint '{v8}'
+define void @inline_asm_4xi32_in_v_use(<4 x i32> %val) {
+ call void asm sideeffect "; use $0", "{v8}"(<4 x i32> %val)
+ ret void
+}
+
+; ERR: error: couldn't allocate output register for constraint '{v8}'
+define <4 x i32> @inline_asm_4xi32_in_v_def() {
+ %asm = call <4 x i32> asm sideeffect "; def $0", "={v8}"()
+ ret <4 x i32> %asm
+}
+
+; ERR: error: couldn't allocate output register for constraint '{s8}'
+define <4 x i32> @inline_asm_4xi32_in_s_def() {
+ %asm = call <4 x i32> asm sideeffect "; def $0", "={s8}"()
+ ret <4 x i32> %asm
+}
+
+; ERR: error: couldn't allocate input reg for constraint '{v8}'
+; ERR: error: couldn't allocate input reg for constraint 'v'
+define void @inline_asm_2xi8_in_v_use(<2 x i8> %val) {
+ call void asm sideeffect "; use $0", "{v8}"(<2 x i8> %val)
+ call void asm sideeffect "; use $0", "v"(<2 x i8> %val)
+ ret void
+}
+
+; ERR: error: couldn't allocate output register for constraint '{v8}'
+; ERR: error: couldn't allocate output register for constraint 'v'
+define <2 x i8> @inline_asm_2xi8_in_v_def() {
+ %phys = call <2 x i8> asm sideeffect "; def $0", "={v8}"()
+ %virt = call <2 x i8> asm sideeffect "; def $0", "=v"()
+ %r = and <2 x i8> %phys, %virt
+ ret <2 x i8> %r
+}
+
+; ERR: error: couldn't allocate output register for constraint '{s8}'
+; ERR: error: couldn't allocate output register for constraint 's'
+define <2 x i8> @inline_asm_2xi8_in_s_def() {
+ %phys = call <2 x i8> asm sideeffect "; def $0", "={s8}"()
+ %virt = call <2 x i8> asm sideeffect "; def $0", "=s"()
+ %r = and <2 x i8> %phys, %virt
+ ret <2 x i8> %r
+}
diff --git a/llvm/test/CodeGen/AMDGPU/inlineasm-mismatched-size.ll b/llvm/test/CodeGen/AMDGPU/inlineasm-mismatched-size.ll
new file mode 100644
index 00000000000000..0edfaed688b8fc
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/inlineasm-mismatched-size.ll
@@ -0,0 +1,162 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=CHECK %s
+
+; Allow single registers that are too wide for the IR type:
+
+define i16 @inline_asm_i16_in_v_def() {
+; CHECK-LABEL: inline_asm_i16_in_v_def:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v8
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_and_b32_e32 v0, v8, v0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %phys = call i16 asm sideeffect "; def $0", "={v8}"()
+ %virt = call i16 asm sideeffect "; def $0", "=v"()
+ %r = and i16 %phys, %virt
+ ret i16 %r
+}
+
+define void @inline_asm_i16_in_v_use(i16 %val) {
+; CHECK-LABEL: inline_asm_i16_in_v_use:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_and_b32_e32 v8, 0xffff, v0
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v8
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v8
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ call void asm sideeffect "; use $0", "{v8}"(i16 %val)
+ call void asm sideeffect "; use $0", "v"(i16 %val)
+ ret void
+}
+
+define i16 @inline_asm_i16_in_s_def() {
+; CHECK-LABEL: inline_asm_i16_in_s_def:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def s8
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def s4
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_and_b32 s4, s8, s4
+; CHECK-NEXT: v_mov_b32_e32 v0, s4
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %phys = call i16 asm sideeffect "; def $0", "={s8}"()
+ %virt = call i16 asm sideeffect "; def $0", "=s"()
+ %r = and i16 %phys, %virt
+ ret i16 %r
+}
+
+define i8 @inline_asm_i8_in_v_def() {
+; CHECK-LABEL: inline_asm_i8_in_v_def:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v8
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_mov_b32_e32 v0, v8
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %phys = call i8 asm sideeffect "; def $0", "={v8}"()
+ ; %virt = call i8 asm sideeffect "; def $0", "=v"() ; currently fails
+ ; %r = and i8 %phys, %virt
+ ; ret i8 %r
+ ret i8 %phys
+}
+
+; currently broken, v8 should be set to v0 & 0xFF
+define void @inline_asm_i8_in_v_use(i8 %val) {
+; CHECK-LABEL: inline_asm_i8_in_v_use:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v8, v0
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v8
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ call void asm sideeffect "; use $0", "{v8}"(i8 %val)
+ ; call void asm sideeffect "; use $0", "v"(i8 %val) ; currently fails
+ ret void
+}
+
+define i8 @inline_asm_i8_in_sphys_def() {
+; CHECK-LABEL: inline_asm_i8_in_sphys_def:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def s8
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_mov_b32_e32 v0, s8
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %phys = call i8 asm sideeffect "; def $0", "={s8}"()
+ ; %virt = call i8 asm sideeffect "; def $0", "=s"() ; currently fails
+ ; %r = and i8 %phys, %virt
+ ; ret i8 %r
+ ret i8 %phys
+}
+
+
+; Single registers for vector types that fit are fine.
+
+define void @inline_asm_2xi16_in_v_use(<2 x i16> %val) {
+; CHECK-LABEL: inline_asm_2xi16_in_v_use:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mov_b32_e32 v8, v0
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v8
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ call void asm sideeffect "; use $0", "{v8}"(<2 x i16> %val)
+ call void asm sideeffect "; use $0", "v"(<2 x i16> %val)
+ ret void
+}
+
+define <2 x i16> @inline_asm_2xi16_in_v_def() {
+; CHECK-LABEL: inline_asm_2xi16_in_v_def:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v8
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_and_b32_e32 v0, v8, v0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %phys = call <2 x i16> asm sideeffect "; def $0", "={v8}"()
+ %virt = call <2 x i16> asm sideeffect "; def $0", "=v"()
+ %r = and <2 x i16> %phys, %virt
+ ret <2 x i16> %r
+}
+
+define <2 x i16> @inline_asm_2xi16_in_s_def() {
+; CHECK-LABEL: inline_asm_2xi16_in_s_def:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def s8
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; def s4
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_and_b32 s4, s8, s4
+; CHECK-NEXT: v_mov_b32_e32 v0, s4
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %phys = call <2 x i16> asm sideeffect "; def $0", "={s8}"()
+ %virt = call <2 x i16> asm sideeffect "; def $0", "=s"()
+ %r = and <2 x i16> %phys, %virt
+ ret <2 x i16> %r
+}
More information about the llvm-commits
mailing list