[llvm] 2bd8e74 - [AMDGPU] Fix bitcast v4i64/v16i16

Mon Jul 11 13:32:24 PDT 2022

Author: Piotr Sobczak
Date: 2022-07-11T22:27:52+02:00
New Revision: 2bd8e74b94119b478ba36db336035234a9b834e6

URL: https://github.com/llvm/llvm-project/commit/2bd8e74b94119b478ba36db336035234a9b834e6
DIFF: https://github.com/llvm/llvm-project/commit/2bd8e74b94119b478ba36db336035234a9b834e6.diff

LOG: [AMDGPU] Fix bitcast v4i64/v16i16

Fix a regression introduced in D128865.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D129375

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/lib/Target/AMDGPU/SIInstructions.td
    llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 094d5cd586739..d16da2a8b86bd 100644

--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -352,7 +352,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
   // TODO: Generalize to more vector types.
   setOperationAction({ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT},
                      {MVT::v2i16, MVT::v2f16, MVT::v2i8, MVT::v4i8, MVT::v8i8,
-                      MVT::v4i16, MVT::v4f16, MVT::v16i16, MVT::v16f16},
+                      MVT::v4i16, MVT::v4f16},
                      Custom);
 
   // Deal with vec3 vector operations when widened to vec4.

diff  --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 8972bce30dc68..ce8c03bb8d64d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1449,6 +1449,14 @@ def : BitConvert <v8i32, v16f16, VReg_256>;
 def : BitConvert <v8i32, v16i16, VReg_256>;
 def : BitConvert <v8f32, v16f16, VReg_256>;
 def : BitConvert <v8f32, v16i16, VReg_256>;
+def : BitConvert <v16f16, v4i64, VReg_256>;
+def : BitConvert <v16i16, v4i64, VReg_256>;
+def : BitConvert <v16f16, v4f64, VReg_256>;
+def : BitConvert <v16i16, v4f64, VReg_256>;
+def : BitConvert <v4i64, v16f16, VReg_256>;
+def : BitConvert <v4i64, v16i16, VReg_256>;
+def : BitConvert <v4f64, v16f16, VReg_256>;
+def : BitConvert <v4f64, v16i16, VReg_256>;
 
 // 512-bit bitcast
 def : BitConvert <v16i32, v16f32, VReg_512>;

diff  --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll
index 568e66b5756e4..4ae7193fea4fa 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll
@@ -310,3 +310,75 @@ define amdgpu_kernel void @bitcast_f32_to_v1i32(i32 addrspace(1)* %out) {
   store i32 %v1, i32 addrspace(1)* %out
   ret void
 }
+
+; FUNC-LABEL: {{^}}bitcast_v4i64_to_v16i16:
+define amdgpu_kernel void @bitcast_v4i64_to_v16i16(i32 %cond, <16 x i16> addrspace(1)* %out, <4 x i64> %value) {
+entry:
+  %cmp0 = icmp eq i32 %cond, 0
+  br i1 %cmp0, label %if, label %end
+
+if:
+  %phi_value = phi <4 x i64> [zeroinitializer, %entry], [%value, %if]
+  %cast = bitcast <4 x i64> %phi_value to <16 x i16>
+  %cmp1 = icmp eq i32 %cond, 1
+  br i1 %cmp1, label %if, label %end
+
+end:
+  %phi_cast = phi <16 x i16> [zeroinitializer, %entry], [%cast, %if]
+  store <16 x i16> %phi_cast, <16 x i16> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}bitcast_v4f64_to_v16f16:
+define amdgpu_kernel void @bitcast_v4f64_to_v16f16(i32 %cond, <16 x half> addrspace(1)* %out, <4 x double> %value) {
+entry:
+  %cmp0 = icmp eq i32 %cond, 0
+  br i1 %cmp0, label %if, label %end
+
+if:
+  %phi_value = phi <4 x double> [zeroinitializer, %entry], [%value, %if]
+  %cast = bitcast <4 x double> %phi_value to <16 x half>
+  %cmp1 = icmp eq i32 %cond, 1
+  br i1 %cmp1, label %if, label %end
+
+end:
+  %phi_cast = phi <16 x half> [zeroinitializer, %entry], [%cast, %if]
+  store <16 x half> %phi_cast, <16 x half> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}bitcast_v16i16_to_v4i64:
+define amdgpu_kernel void @bitcast_v16i16_to_v4i64(i32 %cond, <4 x i64> addrspace(1)* %out, <16 x i16> %value) {
+entry:
+  %cmp0 = icmp eq i32 %cond, 0
+  br i1 %cmp0, label %if, label %end
+
+if:
+  %phi_value = phi <16 x i16> [zeroinitializer, %entry], [%value, %if]
+  %cast = bitcast <16 x i16> %phi_value to <4 x i64>
+  %cmp1 = icmp eq i32 %cond, 1
+  br i1 %cmp1, label %if, label %end
+
+end:
+  %phi_cast = phi <4 x i64> [zeroinitializer, %entry], [%cast, %if]
+  store <4 x i64> %phi_cast, <4 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}bitcast_v16f16_to_v4f64:
+define amdgpu_kernel void @bitcast_v16f16_to_v4f64(i32 %cond, <4 x double> addrspace(1)* %out, <16 x half> %value) {
+entry:
+  %cmp0 = icmp eq i32 %cond, 0
+  br i1 %cmp0, label %if, label %end
+
+if:
+  %phi_value = phi <16 x half> [zeroinitializer, %entry], [%value, %if]
+  %cast = bitcast <16 x half> %phi_value to <4 x double>
+  %cmp1 = icmp eq i32 %cond, 1
+  br i1 %cmp1, label %if, label %end
+
+end:
+  %phi_cast = phi <4 x double> [zeroinitializer, %entry], [%cast, %if]
+  store <4 x double> %phi_cast, <4 x double> addrspace(1)* %out
+  ret void
+}