[llvm] r369565 - [MVT] Add v16f16 and v32f16 vectors.

Wed Aug 21 12:14:48 PDT 2019

Author: ctopper
Date: Wed Aug 21 12:14:48 2019
New Revision: 369565

URL: http://llvm.org/viewvc/llvm-project?rev=369565&view=rev
Log:
[MVT] Add v16f16 and v32f16 vectors.

I might look at improving PR43065 which will require being
able to mark a 256 and 512 bit vector of f16 as Legal.

Differential Revision: https://reviews.llvm.org/D66515

Modified:
    llvm/trunk/include/llvm/CodeGen/ValueTypes.td
    llvm/trunk/include/llvm/Support/MachineValueType.h
    llvm/trunk/lib/CodeGen/ValueTypes.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Modified: llvm/trunk/include/llvm/CodeGen/ValueTypes.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ValueTypes.td?rev=369565&r1=369564&r2=369565&view=diff
==============================================================================

--- llvm/trunk/include/llvm/CodeGen/ValueTypes.td (original)
+++ llvm/trunk/include/llvm/CodeGen/ValueTypes.td Wed Aug 21 12:14:48 2019
@@ -126,43 +126,45 @@ def v2f16    : ValueType<32 , 93>;    //
 def v3f16    : ValueType<48 , 94>;    //    3 x f16 vector value
 def v4f16    : ValueType<64 , 95>;    //    4 x f16 vector value
 def v8f16    : ValueType<128, 96>;    //    8 x f16 vector value
-def v1f32    : ValueType<32 , 97>;    //    1 x f32 vector value
-def v2f32    : ValueType<64 , 98>;    //    2 x f32 vector value
-def v3f32    : ValueType<96 , 99>;    //    3 x f32 vector value
-def v4f32    : ValueType<128, 100>;   //    4 x f32 vector value
-def v5f32    : ValueType<160, 101>;   //    5 x f32 vector value
-def v8f32    : ValueType<256, 102>;   //    8 x f32 vector value
-def v16f32   : ValueType<512,  103>;  //   16 x f32 vector value
-def v32f32   : ValueType<1024, 104>;  //   32 x f32 vector value
-def v64f32   : ValueType<2048, 105>;  //   64 x f32 vector value
-def v128f32  : ValueType<4096, 106>;  //  128 x f32 vector value
-def v256f32  : ValueType<8182, 107>;  //  256 x f32 vector value
-def v512f32  : ValueType<16384, 108>; //  512 x f32 vector value
-def v1024f32 : ValueType<32768, 109>; // 1024 x f32 vector value
-def v2048f32 : ValueType<65536, 110>; // 2048 x f32 vector value
-def v1f64    : ValueType<64, 111>;    //    1 x f64 vector value
-def v2f64    : ValueType<128, 112>;   //    2 x f64 vector value
-def v4f64    : ValueType<256, 113>;   //    4 x f64 vector value
-def v8f64    : ValueType<512, 114>;   //    8 x f64 vector value
+def v16f16   : ValueType<256, 97>;    //    8 x f16 vector value
+def v32f16   : ValueType<512, 98>;    //    8 x f16 vector value
+def v1f32    : ValueType<32 , 99>;    //    1 x f32 vector value
+def v2f32    : ValueType<64 , 100>;   //    2 x f32 vector value
+def v3f32    : ValueType<96 , 101>;   //    3 x f32 vector value
+def v4f32    : ValueType<128, 102>;   //    4 x f32 vector value
+def v5f32    : ValueType<160, 103>;   //    5 x f32 vector value
+def v8f32    : ValueType<256, 104>;   //    8 x f32 vector value
+def v16f32   : ValueType<512,  105>;  //   16 x f32 vector value
+def v32f32   : ValueType<1024, 106>;  //   32 x f32 vector value
+def v64f32   : ValueType<2048, 107>;  //   64 x f32 vector value
+def v128f32  : ValueType<4096, 108>;  //  128 x f32 vector value
+def v256f32  : ValueType<8182, 109>;  //  256 x f32 vector value
+def v512f32  : ValueType<16384, 110>; //  512 x f32 vector value
+def v1024f32 : ValueType<32768, 111>; // 1024 x f32 vector value
+def v2048f32 : ValueType<65536, 112>; // 2048 x f32 vector value
+def v1f64    : ValueType<64, 113>;    //    1 x f64 vector value
+def v2f64    : ValueType<128, 114>;   //    2 x f64 vector value
+def v4f64    : ValueType<256, 115>;   //    4 x f64 vector value
+def v8f64    : ValueType<512, 116>;   //    8 x f64 vector value
 
-def nxv2f16  : ValueType<32 , 115>; // n x  2 x f16 vector value
-def nxv4f16  : ValueType<64 , 116>; // n x  4 x f16 vector value
-def nxv8f16  : ValueType<128, 117>; // n x  8 x f16 vector value
-def nxv1f32  : ValueType<32 , 118>; // n x  1 x f32 vector value
-def nxv2f32  : ValueType<64 , 119>; // n x  2 x f32 vector value
-def nxv4f32  : ValueType<128, 120>; // n x  4 x f32 vector value
-def nxv8f32  : ValueType<256, 121>; // n x  8 x f32 vector value
-def nxv16f32 : ValueType<512, 122>; // n x 16 x f32 vector value
-def nxv1f64  : ValueType<64,  123>; // n x  1 x f64 vector value
-def nxv2f64  : ValueType<128, 124>; // n x  2 x f64 vector value
-def nxv4f64  : ValueType<256, 125>; // n x  4 x f64 vector value
-def nxv8f64  : ValueType<512, 126>; // n x  8 x f64 vector value
+def nxv2f16  : ValueType<32 , 117>; // n x  2 x f16 vector value
+def nxv4f16  : ValueType<64 , 118>; // n x  4 x f16 vector value
+def nxv8f16  : ValueType<128, 119>; // n x  8 x f16 vector value
+def nxv1f32  : ValueType<32 , 120>; // n x  1 x f32 vector value
+def nxv2f32  : ValueType<64 , 121>; // n x  2 x f32 vector value
+def nxv4f32  : ValueType<128, 122>; // n x  4 x f32 vector value
+def nxv8f32  : ValueType<256, 123>; // n x  8 x f32 vector value
+def nxv16f32 : ValueType<512, 124>; // n x 16 x f32 vector value
+def nxv1f64  : ValueType<64,  125>; // n x  1 x f64 vector value
+def nxv2f64  : ValueType<128, 126>; // n x  2 x f64 vector value
+def nxv4f64  : ValueType<256, 127>; // n x  4 x f64 vector value
+def nxv8f64  : ValueType<512, 128>; // n x  8 x f64 vector value
 
-def x86mmx : ValueType<64 , 127>;   // X86 MMX value
-def FlagVT : ValueType<0  , 128>;   // Pre-RA sched glue
-def isVoid : ValueType<0  , 129>;   // Produces no value
-def untyped: ValueType<8  , 130>;   // Produces an untyped value
-def exnref: ValueType<0, 131>;      // WebAssembly's exnref type
+def x86mmx : ValueType<64 , 129>;   // X86 MMX value
+def FlagVT : ValueType<0  , 130>;   // Pre-RA sched glue
+def isVoid : ValueType<0  , 131>;   // Produces no value
+def untyped: ValueType<8  , 132>;   // Produces an untyped value
+def exnref: ValueType<0, 133>;      // WebAssembly's exnref type
 def token  : ValueType<0  , 248>;   // TokenTy
 def MetadataVT: ValueType<0, 249>;  // Metadata
 

Modified: llvm/trunk/include/llvm/Support/MachineValueType.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/MachineValueType.h?rev=369565&r1=369564&r2=369565&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Support/MachineValueType.h (original)
+++ llvm/trunk/include/llvm/Support/MachineValueType.h Wed Aug 21 12:14:48 2019
@@ -158,37 +158,39 @@ namespace llvm {
       v3f16          =  94,   //    3 x f16
       v4f16          =  95,   //    4 x f16
       v8f16          =  96,   //    8 x f16
-      v1f32          =  97,   //    1 x f32
-      v2f32          =  98,   //    2 x f32
-      v3f32          =  99,   //    3 x f32
-      v4f32          =  100,  //    4 x f32
-      v5f32          =  101,  //    5 x f32
-      v8f32          =  102,  //    8 x f32
-      v16f32         =  103,  //   16 x f32
-      v32f32         =  104,  //   32 x f32
-      v64f32         =  105,  //   64 x f32
-      v128f32        =  106,  //  128 x f32
-      v256f32        =  107,  //  256 x f32
-      v512f32        =  108,  //  512 x f32
-      v1024f32       =  109,  // 1024 x f32
-      v2048f32       =  110,  // 2048 x f32
-      v1f64          =  111,  //    1 x f64
-      v2f64          =  112,  //    2 x f64
-      v4f64          =  113,  //    4 x f64
-      v8f64          =  114,  //    8 x f64
-
-      nxv2f16        =  115,  // n x  2 x f16
-      nxv4f16        =  116,  // n x  4 x f16
-      nxv8f16        =  117,  // n x  8 x f16
-      nxv1f32        =  118,  // n x  1 x f32
-      nxv2f32        =  119,  // n x  2 x f32
-      nxv4f32        =  120,  // n x  4 x f32
-      nxv8f32        =  121,  // n x  8 x f32
-      nxv16f32       =  122,  // n x 16 x f32
-      nxv1f64        =  123,  // n x  1 x f64
-      nxv2f64        =  124,  // n x  2 x f64
-      nxv4f64        =  125,  // n x  4 x f64
-      nxv8f64        =  126,  // n x  8 x f64
+      v16f16         =  97,   //   16 x f16
+      v32f16         =  98,   //   32 x f16
+      v1f32          =  99,   //    1 x f32
+      v2f32          =  100,  //    2 x f32
+      v3f32          =  101,  //    3 x f32
+      v4f32          =  102,  //    4 x f32
+      v5f32          =  103,  //    5 x f32
+      v8f32          =  104,  //    8 x f32
+      v16f32         =  105,  //   16 x f32
+      v32f32         =  106,  //   32 x f32
+      v64f32         =  107,  //   64 x f32
+      v128f32        =  108,  //  128 x f32
+      v256f32        =  109,  //  256 x f32
+      v512f32        =  110,  //  512 x f32
+      v1024f32       =  111,  // 1024 x f32
+      v2048f32       =  112,  // 2048 x f32
+      v1f64          =  113,  //    1 x f64
+      v2f64          =  114,  //    2 x f64
+      v4f64          =  115,  //    4 x f64
+      v8f64          =  116,  //    8 x f64
+
+      nxv2f16        =  117,  // n x  2 x f16
+      nxv4f16        =  118,  // n x  4 x f16
+      nxv8f16        =  119,  // n x  8 x f16
+      nxv1f32        =  120,  // n x  1 x f32
+      nxv2f32        =  121,  // n x  2 x f32
+      nxv4f32        =  122,  // n x  4 x f32
+      nxv8f32        =  123,  // n x  8 x f32
+      nxv16f32       =  124,  // n x 16 x f32
+      nxv1f64        =  125,  // n x  1 x f64
+      nxv2f64        =  126,  // n x  2 x f64
+      nxv4f64        =  127,  // n x  4 x f64
+      nxv8f64        =  128,  // n x  8 x f64
 
       FIRST_FP_VECTOR_VALUETYPE = v2f16,
       LAST_FP_VECTOR_VALUETYPE = nxv8f64,
@@ -199,20 +201,20 @@ namespace llvm {
       FIRST_VECTOR_VALUETYPE = v1i1,
       LAST_VECTOR_VALUETYPE  = nxv8f64,
 
-      x86mmx         =  127,   // This is an X86 MMX value
+      x86mmx         =  129,   // This is an X86 MMX value
 
-      Glue           =  128,   // This glues nodes together during pre-RA sched
+      Glue           =  130,   // This glues nodes together during pre-RA sched
 
-      isVoid         =  129,   // This has no value
+      isVoid         =  131,   // This has no value
 
-      Untyped        =  130,   // This value takes a register, but has
+      Untyped        =  132,   // This value takes a register, but has
                                // unspecified type.  The register class
                                // will be determined by the opcode.
 
-      exnref         =  131,   // WebAssembly's exnref type
+      exnref         =  133,   // WebAssembly's exnref type
 
       FIRST_VALUETYPE = 1,     // This is always the beginning of the list.
-      LAST_VALUETYPE =  132,   // This always remains at the end of the list.
+      LAST_VALUETYPE =  134,   // This always remains at the end of the list.
 
       // This is the current maximum for LAST_VALUETYPE.
       // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
@@ -341,17 +343,18 @@ namespace llvm {
 
     /// Return true if this is a 256-bit vector type.
     bool is256BitVector() const {
-      return (SimpleTy == MVT::v8f32 || SimpleTy == MVT::v4f64  ||
-              SimpleTy == MVT::v32i8 || SimpleTy == MVT::v16i16 ||
-              SimpleTy == MVT::v8i32 || SimpleTy == MVT::v4i64);
+      return (SimpleTy == MVT::v16f16 || SimpleTy == MVT::v8f32 ||
+              SimpleTy == MVT::v4f64  || SimpleTy == MVT::v32i8 ||
+              SimpleTy == MVT::v16i16 || SimpleTy == MVT::v8i32 ||
+              SimpleTy == MVT::v4i64);
     }
 
     /// Return true if this is a 512-bit vector type.
     bool is512BitVector() const {
-      return (SimpleTy == MVT::v16f32 || SimpleTy == MVT::v8f64  ||
-              SimpleTy == MVT::v512i1 || SimpleTy == MVT::v64i8  ||
-              SimpleTy == MVT::v32i16 || SimpleTy == MVT::v16i32 ||
-              SimpleTy == MVT::v8i64);
+      return (SimpleTy == MVT::v32f16 || SimpleTy == MVT::v16f32 ||
+              SimpleTy == MVT::v8f64  || SimpleTy == MVT::v512i1 ||
+              SimpleTy == MVT::v64i8  || SimpleTy == MVT::v32i16 ||
+              SimpleTy == MVT::v16i32 || SimpleTy == MVT::v8i64);
     }
 
     /// Return true if this is a 1024-bit vector type.
@@ -483,6 +486,8 @@ namespace llvm {
       case v3f16:
       case v4f16:
       case v8f16:
+      case v16f16:
+      case v32f16:
       case nxv2f16:
       case nxv4f16:
       case nxv8f16: return f16;
@@ -546,6 +551,7 @@ namespace llvm {
       case v32i16:
       case v32i32:
       case v32i64:
+      case v32f16:
       case v32f32:
       case nxv32i1:
       case nxv32i8:
@@ -557,6 +563,7 @@ namespace llvm {
       case v16i16:
       case v16i32:
       case v16i64:
+      case v16f16:
       case v16f32:
       case nxv16i1:
       case nxv16i8:
@@ -741,6 +748,7 @@ namespace llvm {
       case v16i16:
       case v8i32:
       case v4i64:
+      case v16f16:
       case v8f32:
       case v4f64:
       case nxv32i8:
@@ -754,6 +762,7 @@ namespace llvm {
       case v32i16:
       case v16i32:
       case v8i64:
+      case v32f16:
       case v16f32:
       case v8f64:
       case nxv32i16:
@@ -931,6 +940,8 @@ namespace llvm {
         if (NumElements == 3)  return MVT::v3f16;
         if (NumElements == 4)  return MVT::v4f16;
         if (NumElements == 8)  return MVT::v8f16;
+        if (NumElements == 16) return MVT::v16f16;
+        if (NumElements == 32) return MVT::v32f16;
         break;
       case MVT::f32:
         if (NumElements == 1)    return MVT::v1f32;

Modified: llvm/trunk/lib/CodeGen/ValueTypes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ValueTypes.cpp?rev=369565&r1=369564&r2=369565&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/ValueTypes.cpp (original)
+++ llvm/trunk/lib/CodeGen/ValueTypes.cpp Wed Aug 21 12:14:48 2019
@@ -330,6 +330,8 @@ Type *EVT::getTypeForEVT(LLVMContext &Co
   case MVT::v3f16:   return VectorType::get(Type::getHalfTy(Context), 3);
   case MVT::v4f16:   return VectorType::get(Type::getHalfTy(Context), 4);
   case MVT::v8f16:   return VectorType::get(Type::getHalfTy(Context), 8);
+  case MVT::v16f16:  return VectorType::get(Type::getHalfTy(Context), 16);
+  case MVT::v32f16:  return VectorType::get(Type::getHalfTy(Context), 32);
   case MVT::v1f32:   return VectorType::get(Type::getFloatTy(Context), 1);
   case MVT::v2f32:   return VectorType::get(Type::getFloatTy(Context), 2);
   case MVT::v3f32:   return VectorType::get(Type::getFloatTy(Context), 3);

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=369565&r1=369564&r2=369565&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Wed Aug 21 12:14:48 2019
@@ -157,6 +157,8 @@ AMDGPUTargetLowering::AMDGPUTargetLoweri
   setLoadExtAction(ISD::EXTLOAD, MVT::v3f32, MVT::v3f16, Expand);
   setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
   setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::v32f32, MVT::v32f16, Expand);
 
   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
   setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
@@ -219,6 +221,8 @@ AMDGPUTargetLowering::AMDGPUTargetLoweri
   setTruncStoreAction(MVT::v3f32, MVT::v3f16, Expand);
   setTruncStoreAction(MVT::v4f32, MVT::v4f16, Expand);
   setTruncStoreAction(MVT::v8f32, MVT::v8f16, Expand);
+  setTruncStoreAction(MVT::v16f32, MVT::v16f16, Expand);
+  setTruncStoreAction(MVT::v32f32, MVT::v32f16, Expand);
 
   setTruncStoreAction(MVT::f64, MVT::f16, Expand);
   setTruncStoreAction(MVT::f64, MVT::f32, Expand);