[llvm] r334301 - [AMDGPU] Inline asm - added i16, half and i128 types support
Daniil Fukalov via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 8 09:29:04 PDT 2018
Author: dfukalov
Date: Fri Jun 8 09:29:04 2018
New Revision: 334301
URL: http://llvm.org/viewvc/llvm-project?rev=334301&view=rev
Log:
[AMDGPU] Inline asm - added i16, half and i128 types support
AMDGPU inline assembler support i16, half and i128 typed variables in constraints, but they were reported as error.
Needed to fix https://github.com/RadeonOpenCompute/ROCm/issues/341,
e.g. to be able to load with global_load_dwordx4 to a 128bit integer variable
Differential Revision: https://reviews.llvm.org/D44920
Modified:
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/test/CodeGen/AMDGPU/inline-constraints.ll
llvm/trunk/test/CodeGen/AMDGPU/inlineasm-16.ll
llvm/trunk/test/CodeGen/AMDGPU/inlineasm-illegal-type.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=334301&r1=334300&r2=334301&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Fri Jun 8 09:29:04 2018
@@ -7779,11 +7779,11 @@ std::pair<unsigned, const TargetRegister
SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint,
MVT VT) const {
- if (!isTypeLegal(VT))
- return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
-
+ const TargetRegisterClass *RC = nullptr;
if (Constraint.size() == 1) {
switch (Constraint[0]) {
+ default:
+ return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
case 's':
case 'r':
switch (VT.getSizeInBits()) {
@@ -7791,40 +7791,56 @@ SITargetLowering::getRegForInlineAsmCons
return std::make_pair(0U, nullptr);
case 32:
case 16:
- return std::make_pair(0U, &AMDGPU::SReg_32_XM0RegClass);
+ RC = &AMDGPU::SReg_32_XM0RegClass;
+ break;
case 64:
- return std::make_pair(0U, &AMDGPU::SGPR_64RegClass);
+ RC = &AMDGPU::SGPR_64RegClass;
+ break;
case 128:
- return std::make_pair(0U, &AMDGPU::SReg_128RegClass);
+ RC = &AMDGPU::SReg_128RegClass;
+ break;
case 256:
- return std::make_pair(0U, &AMDGPU::SReg_256RegClass);
+ RC = &AMDGPU::SReg_256RegClass;
+ break;
case 512:
- return std::make_pair(0U, &AMDGPU::SReg_512RegClass);
+ RC = &AMDGPU::SReg_512RegClass;
+ break;
}
-
+ break;
case 'v':
switch (VT.getSizeInBits()) {
default:
return std::make_pair(0U, nullptr);
case 32:
case 16:
- return std::make_pair(0U, &AMDGPU::VGPR_32RegClass);
+ RC = &AMDGPU::VGPR_32RegClass;
+ break;
case 64:
- return std::make_pair(0U, &AMDGPU::VReg_64RegClass);
+ RC = &AMDGPU::VReg_64RegClass;
+ break;
case 96:
- return std::make_pair(0U, &AMDGPU::VReg_96RegClass);
+ RC = &AMDGPU::VReg_96RegClass;
+ break;
case 128:
- return std::make_pair(0U, &AMDGPU::VReg_128RegClass);
+ RC = &AMDGPU::VReg_128RegClass;
+ break;
case 256:
- return std::make_pair(0U, &AMDGPU::VReg_256RegClass);
+ RC = &AMDGPU::VReg_256RegClass;
+ break;
case 512:
- return std::make_pair(0U, &AMDGPU::VReg_512RegClass);
+ RC = &AMDGPU::VReg_512RegClass;
+ break;
}
+ break;
}
+ // We actually support i128, i16 and f16 as inline parameters
+ // even if they are not reported as legal
+ if (RC && (isTypeLegal(VT) || VT.SimpleTy == MVT::i128 ||
+ VT.SimpleTy == MVT::i16 || VT.SimpleTy == MVT::f16))
+ return std::make_pair(0U, RC);
}
if (Constraint.size() > 1) {
- const TargetRegisterClass *RC = nullptr;
if (Constraint[1] == 'v') {
RC = &AMDGPU::VGPR_32RegClass;
} else if (Constraint[1] == 's') {
Modified: llvm/trunk/test/CodeGen/AMDGPU/inline-constraints.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/inline-constraints.ll?rev=334301&r1=334300&r2=334301&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/inline-constraints.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/inline-constraints.ll Fri Jun 8 09:29:04 2018
@@ -4,20 +4,28 @@
; GCN-LABEL: {{^}}inline_reg_constraints:
; GCN: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
; GCN: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
+; GCN: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
+; GCN: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
; GCN: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
; GCN: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
+; GCN: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
+; GCN: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
; GCN: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
; GCN: s_load_dwordx8 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
define amdgpu_kernel void @inline_reg_constraints(i32 addrspace(1)* %ptr) {
entry:
%v32 = tail call i32 asm sideeffect "flat_load_dword $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
- %v64 = tail call <2 x i32> asm sideeffect "flat_load_dwordx2 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
- %v128 = tail call <4 x i32> asm sideeffect "flat_load_dwordx4 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
- %s32 = tail call i32 asm sideeffect "s_load_dword $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
- %s64 = tail call <2 x i32> asm sideeffect "s_load_dwordx2 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
- %s128 = tail call <4 x i32> asm sideeffect "s_load_dwordx4 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
+ %v2_32 = tail call <2 x i32> asm sideeffect "flat_load_dwordx2 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
+ %v64 = tail call i64 asm sideeffect "flat_load_dwordx2 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
+ %v4_32 = tail call <4 x i32> asm sideeffect "flat_load_dwordx4 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
+ %v128 = tail call i128 asm sideeffect "flat_load_dwordx4 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
+ %s32 = tail call i32 asm sideeffect "s_load_dword $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
+ %s32_2 = tail call <2 x i32> asm sideeffect "s_load_dwordx2 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
+ %s64 = tail call i64 asm sideeffect "s_load_dwordx2 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
+ %s4_32 = tail call <4 x i32> asm sideeffect "s_load_dwordx4 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
+ %s128 = tail call i128 asm sideeffect "s_load_dwordx4 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
%s256 = tail call <8 x i32> asm sideeffect "s_load_dwordx8 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
ret void
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/inlineasm-16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/inlineasm-16.ll?rev=334301&r1=334300&r2=334301&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/inlineasm-16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/inlineasm-16.ll Fri Jun 8 09:29:04 2018
@@ -1,10 +1,10 @@
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
-; RUN: not llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=SICI %s
-; RUN: not llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=SICI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s 2>&1 | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s 2>&1 | FileCheck -enable-var-scope -check-prefix=GCN %s
; GCN-LABEL: {{^}}s_input_output_i16:
-; SICI: error: couldn't allocate output register for constraint 's'
-; SICI: error: couldn't allocate input reg for constraint 's'
+; GCN: s_mov_b32 s[[REG:[0-9]+]], -1
+; GCN: ; use s[[REG]]
define amdgpu_kernel void @s_input_output_i16() #0 {
%v = tail call i16 asm sideeffect "s_mov_b32 $0, -1", "=s"()
tail call void asm sideeffect "; use $0", "s"(i16 %v) #0
@@ -12,8 +12,8 @@ define amdgpu_kernel void @s_input_outpu
}
; GCN-LABEL: {{^}}v_input_output_i16:
-; SICI: error: couldn't allocate output register for constraint 'v'
-; SICI: error: couldn't allocate input reg for constraint 'v'
+; GCN: v_mov_b32 v[[REG:[0-9]+]], -1
+; GCN: ; use v[[REG]]
define amdgpu_kernel void @v_input_output_i16() #0 {
%v = tail call i16 asm sideeffect "v_mov_b32 $0, -1", "=v"() #0
tail call void asm sideeffect "; use $0", "v"(i16 %v)
@@ -21,8 +21,8 @@ define amdgpu_kernel void @v_input_outpu
}
; GCN-LABEL: {{^}}s_input_output_f16:
-; SICI: error: couldn't allocate output register for constraint 's'
-; SICI: error: couldn't allocate input reg for constraint 's'
+; GCN: s_mov_b32 s[[REG:[0-9]+]], -1
+; GCN: ; use s[[REG]]
define amdgpu_kernel void @s_input_output_f16() #0 {
%v = tail call half asm sideeffect "s_mov_b32 $0, -1", "=s"() #0
tail call void asm sideeffect "; use $0", "s"(half %v)
@@ -30,8 +30,8 @@ define amdgpu_kernel void @s_input_outpu
}
; GCN-LABEL: {{^}}v_input_output_f16:
-; SICI: error: couldn't allocate output register for constraint 'v'
-; SICI: error: couldn't allocate input reg for constraint 'v'
+; GCN: v_mov_b32 v[[REG:[0-9]+]], -1
+; GCN: ; use v[[REG]]
define amdgpu_kernel void @v_input_output_f16() #0 {
%v = tail call half asm sideeffect "v_mov_b32 $0, -1", "=v"() #0
tail call void asm sideeffect "; use $0", "v"(half %v)
Modified: llvm/trunk/test/CodeGen/AMDGPU/inlineasm-illegal-type.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/inlineasm-illegal-type.ll?rev=334301&r1=334300&r2=334301&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/inlineasm-illegal-type.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/inlineasm-illegal-type.ll Fri Jun 8 09:29:04 2018
@@ -1,5 +1,6 @@
-; RUN: not llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=CI %s
-; RUN: not llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: not llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=SICI %s
+; RUN: not llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN %s
+; RUN: not llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=SICI %s
; GCN: error: couldn't allocate output register for constraint 's'
; GCN: error: couldn't allocate input reg for constraint 's'
@@ -19,32 +20,14 @@ define amdgpu_kernel void @v_input_outpu
; GCN: error: couldn't allocate output register for constraint 's'
; GCN: error: couldn't allocate input reg for constraint 's'
-define amdgpu_kernel void @s_input_output_i128() {
- %v = tail call i128 asm sideeffect "s_mov_b32 $0, -1", "=s"()
- tail call void asm sideeffect "; use $0", "s"(i128 %v)
- ret void
-}
-
-; GCN: error: couldn't allocate output register for constraint 's'
-; GCN: error: couldn't allocate input reg for constraint 's'
define amdgpu_kernel void @s_input_output_v8f16() {
%v = tail call <8 x half> asm sideeffect "s_mov_b32 $0, -1", "=s"()
tail call void asm sideeffect "; use $0", "s"(<8 x half> %v)
ret void
}
-; CI: error: couldn't allocate output register for constraint 's'
-; CI: error: couldn't allocate input reg for constraint 's'
-; VI-NOT: error
-define amdgpu_kernel void @s_input_output_f16() {
- %v = tail call half asm sideeffect "s_mov_b32 $0, -1", "=s"()
- tail call void asm sideeffect "; use $0", "s"(half %v)
- ret void
-}
-
-; CI: error: couldn't allocate output register for constraint 's'
-; CI: error: couldn't allocate input reg for constraint 's'
-
+; SICI: error: couldn't allocate output register for constraint 's'
+; SICI: error: couldn't allocate input reg for constraint 's'
; VI-NOT: error
define amdgpu_kernel void @s_input_output_v2f16() {
%v = tail call <2 x half> asm sideeffect "s_mov_b32 $0, -1", "=s"()
@@ -52,8 +35,8 @@ define amdgpu_kernel void @s_input_outpu
ret void
}
-; CI: error: couldn't allocate output register for constraint 'v'
-; CI: error: couldn't allocate input reg for constraint 'v'
+; SICI: error: couldn't allocate output register for constraint 'v'
+; SICI: error: couldn't allocate input reg for constraint 'v'
; VI-NOT: error
define amdgpu_kernel void @v_input_output_v2f16() {
%v = tail call <2 x half> asm sideeffect "v_mov_b32 $0, -1", "=v"()
@@ -61,20 +44,8 @@ define amdgpu_kernel void @v_input_outpu
ret void
}
-; CI: error: couldn't allocate output register for constraint 's'
-; CI: error: couldn't allocate input reg for constraint 's'
-; VI-NOT: error
-define amdgpu_kernel void @s_input_output_i16() {
- %v = tail call i16 asm sideeffect "s_mov_b32 $0, -1", "=s"()
- tail call void asm sideeffect "; use $0", "s"(i16 %v)
- ret void
-}
-
-; FIXME: Should work on all targets?
-
-; CI: error: couldn't allocate output register for constraint 's'
-; CI: error: couldn't allocate input reg for constraint 's'
-
+; SICI: error: couldn't allocate output register for constraint 's'
+; SICI: error: couldn't allocate input reg for constraint 's'
; VI-NOT: error
define amdgpu_kernel void @s_input_output_v2i16() {
%v = tail call <2 x i16> asm sideeffect "s_mov_b32 $0, -1", "=s"()
More information about the llvm-commits
mailing list