[llvm] r334301 - [AMDGPU] Inline asm - added i16, half and i128 types support

Daniil Fukalov via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 8 09:29:04 PDT 2018


Author: dfukalov
Date: Fri Jun  8 09:29:04 2018
New Revision: 334301

URL: http://llvm.org/viewvc/llvm-project?rev=334301&view=rev
Log:
[AMDGPU] Inline asm - added i16, half and i128 types support

AMDGPU inline assembler support i16, half and i128 typed variables in constraints, but they were reported as error.
Needed to fix https://github.com/RadeonOpenCompute/ROCm/issues/341,
e.g. to be able to load with global_load_dwordx4 to a 128bit integer variable

Differential Revision: https://reviews.llvm.org/D44920

Modified:
    llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/trunk/test/CodeGen/AMDGPU/inline-constraints.ll
    llvm/trunk/test/CodeGen/AMDGPU/inlineasm-16.ll
    llvm/trunk/test/CodeGen/AMDGPU/inlineasm-illegal-type.ll

Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=334301&r1=334300&r2=334301&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Fri Jun  8 09:29:04 2018
@@ -7779,11 +7779,11 @@ std::pair<unsigned, const TargetRegister
 SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
                                                StringRef Constraint,
                                                MVT VT) const {
-  if (!isTypeLegal(VT))
-    return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
-
+  const TargetRegisterClass *RC = nullptr;
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
+    default:
+      return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
     case 's':
     case 'r':
       switch (VT.getSizeInBits()) {
@@ -7791,40 +7791,56 @@ SITargetLowering::getRegForInlineAsmCons
         return std::make_pair(0U, nullptr);
       case 32:
       case 16:
-        return std::make_pair(0U, &AMDGPU::SReg_32_XM0RegClass);
+        RC = &AMDGPU::SReg_32_XM0RegClass;
+        break;
       case 64:
-        return std::make_pair(0U, &AMDGPU::SGPR_64RegClass);
+        RC = &AMDGPU::SGPR_64RegClass;
+        break;
       case 128:
-        return std::make_pair(0U, &AMDGPU::SReg_128RegClass);
+        RC = &AMDGPU::SReg_128RegClass;
+        break;
       case 256:
-        return std::make_pair(0U, &AMDGPU::SReg_256RegClass);
+        RC = &AMDGPU::SReg_256RegClass;
+        break;
       case 512:
-        return std::make_pair(0U, &AMDGPU::SReg_512RegClass);
+        RC = &AMDGPU::SReg_512RegClass;
+        break;
       }
-
+      break;
     case 'v':
       switch (VT.getSizeInBits()) {
       default:
         return std::make_pair(0U, nullptr);
       case 32:
       case 16:
-        return std::make_pair(0U, &AMDGPU::VGPR_32RegClass);
+        RC = &AMDGPU::VGPR_32RegClass;
+        break;
       case 64:
-        return std::make_pair(0U, &AMDGPU::VReg_64RegClass);
+        RC = &AMDGPU::VReg_64RegClass;
+        break;
       case 96:
-        return std::make_pair(0U, &AMDGPU::VReg_96RegClass);
+        RC = &AMDGPU::VReg_96RegClass;
+        break;
       case 128:
-        return std::make_pair(0U, &AMDGPU::VReg_128RegClass);
+        RC = &AMDGPU::VReg_128RegClass;
+        break;
       case 256:
-        return std::make_pair(0U, &AMDGPU::VReg_256RegClass);
+        RC = &AMDGPU::VReg_256RegClass;
+        break;
       case 512:
-        return std::make_pair(0U, &AMDGPU::VReg_512RegClass);
+        RC = &AMDGPU::VReg_512RegClass;
+        break;
       }
+      break;
     }
+    // We actually support i128, i16 and f16 as inline parameters
+    // even if they are not reported as legal
+    if (RC && (isTypeLegal(VT) || VT.SimpleTy == MVT::i128 ||
+               VT.SimpleTy == MVT::i16 || VT.SimpleTy == MVT::f16))
+      return std::make_pair(0U, RC);
   }
 
   if (Constraint.size() > 1) {
-    const TargetRegisterClass *RC = nullptr;
     if (Constraint[1] == 'v') {
       RC = &AMDGPU::VGPR_32RegClass;
     } else if (Constraint[1] == 's') {

Modified: llvm/trunk/test/CodeGen/AMDGPU/inline-constraints.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/inline-constraints.ll?rev=334301&r1=334300&r2=334301&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/inline-constraints.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/inline-constraints.ll Fri Jun  8 09:29:04 2018
@@ -4,20 +4,28 @@
 ; GCN-LABEL: {{^}}inline_reg_constraints:
 ; GCN: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
 ; GCN: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
+; GCN: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
+; GCN: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
 ; GCN: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
 ; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
 ; GCN: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
+; GCN: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
+; GCN: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
 ; GCN: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
 ; GCN: s_load_dwordx8 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
 
 define amdgpu_kernel void @inline_reg_constraints(i32 addrspace(1)* %ptr) {
 entry:
   %v32 = tail call i32 asm sideeffect "flat_load_dword   $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
-  %v64 = tail call <2 x i32> asm sideeffect "flat_load_dwordx2 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
-  %v128 = tail call <4 x i32> asm sideeffect "flat_load_dwordx4 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
-  %s32 =  tail call i32 asm sideeffect "s_load_dword $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
-  %s64 =  tail call <2 x i32> asm sideeffect "s_load_dwordx2 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
-  %s128 =  tail call <4 x i32> asm sideeffect "s_load_dwordx4 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
+  %v2_32 = tail call <2 x i32> asm sideeffect "flat_load_dwordx2 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
+  %v64 =   tail call i64 asm sideeffect "flat_load_dwordx2 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
+  %v4_32 = tail call <4 x i32> asm sideeffect "flat_load_dwordx4 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
+  %v128 =  tail call i128 asm sideeffect "flat_load_dwordx4 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
+  %s32 =   tail call i32 asm sideeffect "s_load_dword $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
+  %s32_2 = tail call <2 x i32> asm sideeffect "s_load_dwordx2 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
+  %s64 =   tail call i64 asm sideeffect "s_load_dwordx2 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
+  %s4_32 =  tail call <4 x i32> asm sideeffect "s_load_dwordx4 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
+  %s128 =  tail call i128 asm sideeffect "s_load_dwordx4 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
   %s256 =  tail call <8 x i32> asm sideeffect "s_load_dwordx8 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
   ret void
 }

Modified: llvm/trunk/test/CodeGen/AMDGPU/inlineasm-16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/inlineasm-16.ll?rev=334301&r1=334300&r2=334301&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/inlineasm-16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/inlineasm-16.ll Fri Jun  8 09:29:04 2018
@@ -1,10 +1,10 @@
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
-; RUN: not llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=SICI %s
-; RUN: not llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=SICI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s 2>&1 | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s 2>&1 | FileCheck -enable-var-scope -check-prefix=GCN %s
 
 ; GCN-LABEL: {{^}}s_input_output_i16:
-; SICI: error: couldn't allocate output register for constraint 's'
-; SICI: error: couldn't allocate input reg for constraint 's'
+; GCN: s_mov_b32 s[[REG:[0-9]+]], -1
+; GCN: ; use s[[REG]]
 define amdgpu_kernel void @s_input_output_i16() #0 {
   %v = tail call i16 asm sideeffect "s_mov_b32 $0, -1", "=s"()
   tail call void asm sideeffect "; use $0", "s"(i16 %v) #0
@@ -12,8 +12,8 @@ define amdgpu_kernel void @s_input_outpu
 }
 
 ; GCN-LABEL: {{^}}v_input_output_i16:
-; SICI: error: couldn't allocate output register for constraint 'v'
-; SICI: error: couldn't allocate input reg for constraint 'v'
+; GCN: v_mov_b32 v[[REG:[0-9]+]], -1
+; GCN: ; use v[[REG]]
 define amdgpu_kernel void @v_input_output_i16() #0 {
   %v = tail call i16 asm sideeffect "v_mov_b32 $0, -1", "=v"() #0
   tail call void asm sideeffect "; use $0", "v"(i16 %v)
@@ -21,8 +21,8 @@ define amdgpu_kernel void @v_input_outpu
 }
 
 ; GCN-LABEL: {{^}}s_input_output_f16:
-; SICI: error: couldn't allocate output register for constraint 's'
-; SICI: error: couldn't allocate input reg for constraint 's'
+; GCN: s_mov_b32 s[[REG:[0-9]+]], -1
+; GCN: ; use s[[REG]]
 define amdgpu_kernel void @s_input_output_f16() #0 {
   %v = tail call half asm sideeffect "s_mov_b32 $0, -1", "=s"() #0
   tail call void asm sideeffect "; use $0", "s"(half %v)
@@ -30,8 +30,8 @@ define amdgpu_kernel void @s_input_outpu
 }
 
 ; GCN-LABEL: {{^}}v_input_output_f16:
-; SICI: error: couldn't allocate output register for constraint 'v'
-; SICI: error: couldn't allocate input reg for constraint 'v'
+; GCN: v_mov_b32 v[[REG:[0-9]+]], -1
+; GCN: ; use v[[REG]]
 define amdgpu_kernel void @v_input_output_f16() #0 {
   %v = tail call half asm sideeffect "v_mov_b32 $0, -1", "=v"() #0
   tail call void asm sideeffect "; use $0", "v"(half %v)

Modified: llvm/trunk/test/CodeGen/AMDGPU/inlineasm-illegal-type.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/inlineasm-illegal-type.ll?rev=334301&r1=334300&r2=334301&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/inlineasm-illegal-type.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/inlineasm-illegal-type.ll Fri Jun  8 09:29:04 2018
@@ -1,5 +1,6 @@
-; RUN: not llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=CI %s
-; RUN: not llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: not llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=SICI %s
+; RUN: not llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN %s
+; RUN: not llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=SICI %s
 
 ; GCN: error: couldn't allocate output register for constraint 's'
 ; GCN: error: couldn't allocate input reg for constraint 's'
@@ -19,32 +20,14 @@ define amdgpu_kernel void @v_input_outpu
 
 ; GCN: error: couldn't allocate output register for constraint 's'
 ; GCN: error: couldn't allocate input reg for constraint 's'
-define amdgpu_kernel void @s_input_output_i128() {
-  %v = tail call i128 asm sideeffect "s_mov_b32 $0, -1", "=s"()
-  tail call void asm sideeffect "; use $0", "s"(i128 %v)
-  ret void
-}
-
-; GCN: error: couldn't allocate output register for constraint 's'
-; GCN: error: couldn't allocate input reg for constraint 's'
 define amdgpu_kernel void @s_input_output_v8f16() {
   %v = tail call <8 x half> asm sideeffect "s_mov_b32 $0, -1", "=s"()
   tail call void asm sideeffect "; use $0", "s"(<8 x half> %v)
   ret void
 }
 
-; CI: error: couldn't allocate output register for constraint 's'
-; CI: error: couldn't allocate input reg for constraint 's'
-; VI-NOT: error
-define amdgpu_kernel void @s_input_output_f16() {
-  %v = tail call half asm sideeffect "s_mov_b32 $0, -1", "=s"()
-  tail call void asm sideeffect "; use $0", "s"(half %v)
-  ret void
-}
-
-; CI: error: couldn't allocate output register for constraint 's'
-; CI: error: couldn't allocate input reg for constraint 's'
-
+; SICI: error: couldn't allocate output register for constraint 's'
+; SICI: error: couldn't allocate input reg for constraint 's'
 ; VI-NOT: error
 define amdgpu_kernel void @s_input_output_v2f16() {
   %v = tail call <2 x half> asm sideeffect "s_mov_b32 $0, -1", "=s"()
@@ -52,8 +35,8 @@ define amdgpu_kernel void @s_input_outpu
   ret void
 }
 
-; CI: error: couldn't allocate output register for constraint 'v'
-; CI: error: couldn't allocate input reg for constraint 'v'
+; SICI: error: couldn't allocate output register for constraint 'v'
+; SICI: error: couldn't allocate input reg for constraint 'v'
 ; VI-NOT: error
 define amdgpu_kernel void @v_input_output_v2f16() {
   %v = tail call <2 x half> asm sideeffect "v_mov_b32 $0, -1", "=v"()
@@ -61,20 +44,8 @@ define amdgpu_kernel void @v_input_outpu
   ret void
 }
 
-; CI: error: couldn't allocate output register for constraint 's'
-; CI: error: couldn't allocate input reg for constraint 's'
-; VI-NOT: error
-define amdgpu_kernel void @s_input_output_i16() {
-  %v = tail call i16 asm sideeffect "s_mov_b32 $0, -1", "=s"()
-  tail call void asm sideeffect "; use $0", "s"(i16 %v)
-  ret void
-}
-
-; FIXME: Should work on all targets?
-
-; CI: error: couldn't allocate output register for constraint 's'
-; CI: error: couldn't allocate input reg for constraint 's'
-
+; SICI: error: couldn't allocate output register for constraint 's'
+; SICI: error: couldn't allocate input reg for constraint 's'
 ; VI-NOT: error
 define amdgpu_kernel void @s_input_output_v2i16() {
   %v = tail call <2 x i16> asm sideeffect "s_mov_b32 $0, -1", "=s"()




More information about the llvm-commits mailing list