[llvm] 50f3bb1 - [AMDGPU] Fixed selection error for 64 bit extract_subvector

Mon May 18 14:18:10 PDT 2020

Author: Stanislav Mekhanoshin
Date: 2020-05-18T14:17:59-07:00
New Revision: 50f3bb13297b291b53abe3437075784c714b1a03

URL: https://github.com/llvm/llvm-project/commit/50f3bb13297b291b53abe3437075784c714b1a03
DIFF: https://github.com/llvm/llvm-project/commit/50f3bb13297b291b53abe3437075784c714b1a03.diff

LOG: [AMDGPU] Fixed selection error for 64 bit extract_subvector

Differential Revision: https://reviews.llvm.org/D80155

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
    llvm/test/CodeGen/AMDGPU/extract-subvector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 54556d100fc2..b453098b682b 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -348,6 +348,14 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16i32, Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32f32, Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32i32, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f64, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i64, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f64, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i64, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f64, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i64, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16f64, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16i64, Custom);
 
   setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
   setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);

diff  --git a/llvm/test/CodeGen/AMDGPU/extract-subvector.ll b/llvm/test/CodeGen/AMDGPU/extract-subvector.ll
index 19eb67569913..0285d18252b0 100644
--- a/llvm/test/CodeGen/AMDGPU/extract-subvector.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract-subvector.ll
@@ -1,26 +1,26 @@
-; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs -o - %s | FileCheck %s
-
-; CHECK-LABEL: foo
-; CHECK: buffer_load_ushort
-; CHECK: buffer_load_ushort
-; CHECK: buffer_load_ushort
-; CHECK: buffer_load_ushort
-; CHECK: buffer_load_ushort
-; CHECK: buffer_load_ushort
-; CHECK: buffer_load_ushort
-; CHECK: buffer_load_ushort
-; CHECK: buffer_load_ushort
-; CHECK: buffer_load_ushort
-; CHECK: buffer_load_ushort
-; CHECK: buffer_load_ushort
-; CHECK: buffer_load_ushort
-; CHECK: buffer_load_ushort
-; CHECK: buffer_load_ushort
-; CHECK: buffer_load_ushort
-; CHECK: v_bfe_i32
-; CHECK: v_bfe_i32
-
-define <2 x i16> @foo(<8 x i16> addrspace(1) * %p0, <8 x i16> addrspace(1) * %p1) {
+; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: extract_2xi16
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: v_bfe_i32
+; GCN: v_bfe_i32
+
+define <2 x i16> @extract_2xi16(<8 x i16> addrspace(1) * %p0, <8 x i16> addrspace(1) * %p1) {
   br i1 undef, label %T, label %F
 
 T:
@@ -38,3 +38,129 @@ exit:
   %r2 = select <2 x i1> %b2, <2 x i16> <i16 -32768, i16 -32768>, <2 x i16> <i16 -1, i16 -1>
   ret <2 x i16> %r2
 }
+
+; GCN-LABEL: extract_2xi64
+; GCN-COUNT-2: v_cndmask_b32
+define <2 x i64> @extract_2xi64(<8 x i64> addrspace(1) * %p0, <8 x i64> addrspace(1) * %p1) {
+  br i1 undef, label %T, label %F
+
+T:
+  %t = load volatile <8 x i64>, <8 x i64> addrspace(1) * %p0
+  br label %exit
+
+F:
+  %f = load volatile <8 x i64>, <8 x i64> addrspace(1) * %p1
+  br label %exit
+
+exit:
+  %m = phi <8 x i64> [ %t, %T ], [ %f, %F ]
+  %v2 = shufflevector <8 x i64> %m, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %b2 = icmp sgt <2 x i64> %v2, <i64 -1, i64 -1>
+  %r2 = select <2 x i1> %b2, <2 x i64> <i64 -32768, i64 -32768>, <2 x i64> <i64 -1, i64 -1>
+  ret <2 x i64> %r2
+}
+
+; GCN-LABEL: extract_4xi64
+; GCN-COUNT-4: v_cndmask_b32
+define <4 x i64> @extract_4xi64(<8 x i64> addrspace(1) * %p0, <8 x i64> addrspace(1) * %p1) {
+  br i1 undef, label %T, label %F
+
+T:
+  %t = load volatile <8 x i64>, <8 x i64> addrspace(1) * %p0
+  br label %exit
+
+F:
+  %f = load volatile <8 x i64>, <8 x i64> addrspace(1) * %p1
+  br label %exit
+
+exit:
+  %m = phi <8 x i64> [ %t, %T ], [ %f, %F ]
+  %v2 = shufflevector <8 x i64> %m, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %b2 = icmp sgt <4 x i64> %v2, <i64 -1, i64 -1, i64 -1, i64 -1>
+  %r2 = select <4 x i1> %b2, <4 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768>, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>
+  ret <4 x i64> %r2
+}
+
+; GCN-LABEL: extract_8xi64
+; GCN-COUNT-8: v_cndmask_b32
+define <8 x i64> @extract_8xi64(<16 x i64> addrspace(1) * %p0, <16 x i64> addrspace(1) * %p1) {
+  br i1 undef, label %T, label %F
+
+T:
+  %t = load volatile <16 x i64>, <16 x i64> addrspace(1) * %p0
+  br label %exit
+
+F:
+  %f = load volatile <16 x i64>, <16 x i64> addrspace(1) * %p1
+  br label %exit
+
+exit:
+  %m = phi <16 x i64> [ %t, %T ], [ %f, %F ]
+  %v2 = shufflevector <16 x i64> %m, <16 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %b2 = icmp sgt <8 x i64> %v2, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
+  %r2 = select <8 x i1> %b2, <8 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768>, <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
+  ret <8 x i64> %r2
+}
+
+; GCN-LABEL: extract_2xf64
+; GCN-COUNT-2: v_cndmask_b32
+define <2 x double> @extract_2xf64(<8 x double> addrspace(1) * %p0, <8 x double> addrspace(1) * %p1) {
+  br i1 undef, label %T, label %F
+
+T:
+  %t = load volatile <8 x double>, <8 x double> addrspace(1) * %p0
+  br label %exit
+
+F:
+  %f = load volatile <8 x double>, <8 x double> addrspace(1) * %p1
+  br label %exit
+
+exit:
+  %m = phi <8 x double> [ %t, %T ], [ %f, %F ]
+  %v2 = shufflevector <8 x double> %m, <8 x double> undef, <2 x i32> <i32 0, i32 1>
+  %b2 = fcmp ogt <2 x double> %v2, <double -1.0, double -1.0>
+  %r2 = select <2 x i1> %b2, <2 x double> <double -2.0, double -2.0>, <2 x double> <double -1.0, double -1.0>
+  ret <2 x double> %r2
+}
+
+; GCN-LABEL: extract_4xf64
+; GCN-COUNT-4: v_cndmask_b32
+define <4 x double> @extract_4xf64(<8 x double> addrspace(1) * %p0, <8 x double> addrspace(1) * %p1) {
+  br i1 undef, label %T, label %F
+
+T:
+  %t = load volatile <8 x double>, <8 x double> addrspace(1) * %p0
+  br label %exit
+
+F:
+  %f = load volatile <8 x double>, <8 x double> addrspace(1) * %p1
+  br label %exit
+
+exit:
+  %m = phi <8 x double> [ %t, %T ], [ %f, %F ]
+  %v2 = shufflevector <8 x double> %m, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %b2 = fcmp ogt <4 x double> %v2, <double -1.0, double -1.0, double -1.0, double -1.0>
+  %r2 = select <4 x i1> %b2, <4 x double> <double -2.0, double -2.0, double -2.0, double -2.0>, <4 x double> <double -1.0, double -1.0, double -1.0, double -1.0>
+  ret <4 x double> %r2
+}
+
+; GCN-LABEL: extract_8xf64
+; GCN-COUNT-8: v_cndmask_b32
+define <8 x double> @extract_8xf64(<16 x double> addrspace(1) * %p0, <16 x double> addrspace(1) * %p1) {
+  br i1 undef, label %T, label %F
+
+T:
+  %t = load volatile <16 x double>, <16 x double> addrspace(1) * %p0
+  br label %exit
+
+F:
+  %f = load volatile <16 x double>, <16 x double> addrspace(1) * %p1
+  br label %exit
+
+exit:
+  %m = phi <16 x double> [ %t, %T ], [ %f, %F ]
+  %v2 = shufflevector <16 x double> %m, <16 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %b2 = fcmp ogt <8 x double> %v2, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>
+  %r2 = select <8 x i1> %b2, <8 x double> <double -2.0, double -2.0, double -2.0, double -2.0, double -2.0, double -2.0, double -2.0, double -2.0>, <8 x double> <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>
+  ret <8 x double> %r2
+}