[llvm] r334189 - AMDGPU: Fix not including v2f64 in SReg_128
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 7 05:16:31 PDT 2018
Author: arsenm
Date: Thu Jun 7 05:16:31 2018
New Revision: 334189
URL: http://llvm.org/viewvc/llvm-project?rev=334189&view=rev
Log:
AMDGPU: Fix not including v2f64 in SReg_128
Fixes assertion with calls returning v2f64.
Modified:
llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td
llvm/trunk/test/CodeGen/AMDGPU/call-return-types.ll
llvm/trunk/test/CodeGen/AMDGPU/function-returns.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td?rev=334189&r1=334188&r2=334189&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td Thu Jun 7 05:16:31 2018
@@ -404,7 +404,7 @@ def Pseudo_SReg_32 : RegisterClass<"AMDG
let CopyCost = -1;
}
-def Pseudo_SReg_128 : RegisterClass<"AMDGPU", [v4i32, v2i64], 32,
+def Pseudo_SReg_128 : RegisterClass<"AMDGPU", [v4i32, v2i64, v2f64], 32,
(add PRIVATE_RSRC_REG)> {
let isAllocatable = 0;
let CopyCost = -1;
@@ -467,7 +467,7 @@ def TTMP_128 : RegisterClass<"AMDGPU", [
let isAllocatable = 0;
}
-def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32,
+def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64, v2f64], 32,
(add SGPR_128, TTMP_128)> {
let AllocationPriority = 10;
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/call-return-types.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/call-return-types.ll?rev=334189&r1=334188&r2=334189&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/call-return-types.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/call-return-types.ll Thu Jun 7 05:16:31 2018
@@ -13,6 +13,8 @@ declare zeroext i8 @external_i8_zeroext_
declare signext i8 @external_i8_signext_func_void() #0
declare i16 @external_i16_func_void() #0
+declare <2 x i16> @external_v2i16_func_void() #0
+declare <4 x i16> @external_v4i16_func_void() #0
declare zeroext i16 @external_i16_zeroext_func_void() #0
declare signext i16 @external_i16_signext_func_void() #0
@@ -22,6 +24,10 @@ declare half @external_f16_func_void() #
declare float @external_f32_func_void() #0
declare double @external_f64_func_void() #0
+declare <2 x half> @external_v2f16_func_void() #0
+declare <4 x half> @external_v4f16_func_void() #0
+declare <2 x double> @external_v2f64_func_void() #0
+
declare <2 x i32> @external_v2i32_func_void() #0
declare <3 x i32> @external_v3i32_func_void() #0
declare <4 x i32> @external_v4i32_func_void() #0
@@ -30,8 +36,6 @@ declare <8 x i32> @external_v8i32_func_v
declare <16 x i32> @external_v16i32_func_void() #0
declare <32 x i32> @external_v32i32_func_void() #0
declare { <32 x i32>, i32 } @external_v32i32_i32_func_void() #0
-declare <2 x i16> @external_v2i16_func_void() #0
-declare <2 x half> @external_v2f16_func_void() #0
declare { i32, i64 } @external_i32_i64_func_void() #0
@@ -152,6 +156,13 @@ define amdgpu_kernel void @test_call_ext
ret void
}
+; GCN-LABEL: {{^}}test_call_external_v2f64_func_void:
+define amdgpu_kernel void @test_call_external_v2f64_func_void() #0 {
+ %val = call <2 x double> @external_v2f64_func_void()
+ store volatile <2 x double> %val, <2 x double> addrspace(1)* undef
+ ret void
+}
+
; GCN-LABEL: {{^}}test_call_external_v2i32_func_void:
define amdgpu_kernel void @test_call_external_v2i32_func_void() #0 {
%val = call <2 x i32> @external_v2i32_func_void()
@@ -208,6 +219,13 @@ define amdgpu_kernel void @test_call_ext
ret void
}
+; GCN-LABEL: {{^}}test_call_external_v4i16_func_void:
+define amdgpu_kernel void @test_call_external_v4i16_func_void() #0 {
+ %val = call <4 x i16> @external_v4i16_func_void()
+ store volatile <4 x i16> %val, <4 x i16> addrspace(1)* undef
+ ret void
+}
+
; GCN-LABEL: {{^}}test_call_external_v2f16_func_void:
define amdgpu_kernel void @test_call_external_v2f16_func_void() #0 {
%val = call <2 x half> @external_v2f16_func_void()
@@ -215,6 +233,13 @@ define amdgpu_kernel void @test_call_ext
ret void
}
+; GCN-LABEL: {{^}}test_call_external_v4f16_func_void:
+define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 {
+ %val = call <4 x half> @external_v4f16_func_void()
+ store volatile <4 x half> %val, <4 x half> addrspace(1)* undef
+ ret void
+}
+
; GCN-LABEL: {{^}}test_call_external_i32_i64_func_void:
define amdgpu_kernel void @test_call_external_i32_i64_func_void() #0 {
%val = call { i32, i64 } @external_i32_i64_func_void()
Modified: llvm/trunk/test/CodeGen/AMDGPU/function-returns.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/function-returns.ll?rev=334189&r1=334188&r2=334189&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/function-returns.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/function-returns.ll Thu Jun 7 05:16:31 2018
@@ -121,6 +121,15 @@ define double @f64_func_void() #0 {
ret double %val
}
+; GCN-LABEL: {{^}}v2f64_func_void:
+; GCN: buffer_load_dwordx4 v[0:3], off
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_setpc_b64
+define <2 x double> @v2f64_func_void() #0 {
+ %val = load <2 x double>, <2 x double> addrspace(1)* undef
+ ret <2 x double> %val
+}
+
; GCN-LABEL: {{^}}v2i32_func_void:
; GCN: buffer_load_dwordx2 v[0:1], off
; GCN-NEXT: s_waitcnt vmcnt(0)
@@ -300,6 +309,15 @@ define <4 x i16> @v4i16_func_void() #0 {
ret <4 x i16> %val
}
+; GCN-LABEL: {{^}}v4f16_func_void:
+; GFX9: buffer_load_dwordx2 v[0:1], off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64
+define <4 x half> @v4f16_func_void() #0 {
+ %val = load <4 x half>, <4 x half> addrspace(1)* undef
+ ret <4 x half> %val
+}
+
; FIXME: Should not scalarize
; GCN-LABEL: {{^}}v5i16_func_void:
; GFX9: buffer_load_dwordx2 v[0:1]
More information about the llvm-commits
mailing list