[llvm] [MVT] Expand the MVT enum to allow more types (PR #69603)
Nemanja Ivanovic via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 19 06:11:38 PDT 2023
https://github.com/nemanjai created https://github.com/llvm/llvm-project/pull/69603
The MVT enum can contain up to 256 values since its underlying type is uint8_t. There are currently 197 MVTs and the last few are reserved for various purposes. Furthermore, the current implementation has a hidden limit for newly added types of 223 because TableGen will set whatever type has its "Value" field set to 223 as the last MVT. Then all the tables for the type legalizer will have 224 entries. This effectively limits the number of MVTs that can be added (in tree our out of tree) to
27. This limit can easily be hit with all the new vector types that may need to be added.
This patch allows the total number of MVTs to grow to 2^^16 without incurring the allocation/deallocation penalty since the total size is limited to the nearest higher multiple of 64 needed to contain all the defined MVTs.
>From 254e223cde274aca1af8fa7476213a5d382b3f34 Mon Sep 17 00:00:00 2001
From: Nemanja Ivanovic <nemanjaivanovic at Nemanjas-MacBook-Air.local>
Date: Thu, 19 Oct 2023 14:56:57 +0200
Subject: [PATCH] [MVT] Expand the MVT enum to allow more types
The MVT enum can contain up to 256 values since its underlying
type is uint8_t. There are currently 197 MVTs and the last few are
reserved for various purposes. Furthermore, the current
implementation has a hidden limit for newly added types of 223
because TableGen will set whatever type has its "Value" field
set to 223 as the last MVT. Then all the tables for the type
legalizer will have 224 entries. This effectively limits the
number of MVTs that can be added (in tree our out of tree) to
27. This limit can easily be hit with all the new vector types
that may need to be added.
This patch allows the total number of MVTs to grow to 2^^16
without incurring the allocation/deallocation penalty since
the total size is limited to the nearest higher multiple of
64 needed to contain all the defined MVTs.
---
llvm/include/llvm/CodeGen/MachineValueType.h | 6 +-
llvm/include/llvm/CodeGen/ValueTypes.td | 517 ++++++++++---------
llvm/utils/TableGen/CodeGenDAGPatterns.h | 11 +-
llvm/utils/TableGen/VTEmitter.cpp | 12 +-
4 files changed, 292 insertions(+), 254 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachineValueType.h b/llvm/include/llvm/CodeGen/MachineValueType.h
index f0d380fa9cda20f..ad6f093456d46cd 100644
--- a/llvm/include/llvm/CodeGen/MachineValueType.h
+++ b/llvm/include/llvm/CodeGen/MachineValueType.h
@@ -33,7 +33,7 @@ namespace llvm {
/// type can be represented by an MVT.
class MVT {
public:
- enum SimpleValueType : uint8_t {
+ enum SimpleValueType : uint16_t {
// Simple value types that aren't explicitly part of this enumeration
// are considered extended value types.
INVALID_SIMPLE_VALUE_TYPE = 0,
@@ -48,8 +48,8 @@ namespace llvm {
// This is the current maximum for LAST_VALUETYPE.
// MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
- // This value must be a multiple of 32.
- MAX_ALLOWED_VALUETYPE = 224,
+ // This value must be a multiple of 64.
+ MAX_ALLOWED_VALUETYPE = 64 * ((VALUETYPE_SIZE + 63) / 64),
};
static_assert(FIRST_VALUETYPE > 0);
diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td
index 25f0d385259db49..6538ce559315d9b 100644
--- a/llvm/include/llvm/CodeGen/ValueTypes.td
+++ b/llvm/include/llvm/CodeGen/ValueTypes.td
@@ -24,6 +24,7 @@ class ValueType<int size, int value> {
int isFP = false;
int isVector = false;
int isScalable = false;
+ bit ForLegalizer = true;
}
class VTAny<int value> : ValueType<0, value> {
@@ -54,261 +55,292 @@ class VTScalableVec<int nelem, ValueType elt, int value>
let isScalable = true;
}
+// Convenience classes allowing for ordered Value definitions so new
+// additions to the middle do not require the user to update the Values
+// of all subsequent VTs.
+class VTIntOrd<int size, ValueType prev> : VTInt<size, 0> {
+ let Value = !add(prev.Value, 1);
+}
+
+class VTFPOrd<int size, ValueType prev> : VTFP<size, 0> {
+ let Value = !add(prev.Value, 1);
+}
+class VTVecOrd<int nelem, ValueType elt, ValueType prev> :
+ VTVec<nelem, elt, 0> {
+ let Value = !add(prev.Value, 1);
+}
+class VTScalVecOrd<int nelem, ValueType elt, ValueType prev> :
+ VTScalableVec<nelem, elt, 0> {
+ let Value = !add(prev.Value, 1);
+}
+class VTOrd<int size, ValueType prev> : ValueType<size, 0> {
+ let Value = !add(prev.Value, 1);
+}
+class VTAnyOrd<ValueType prev> : VTAny<0> {
+ let Value = !add(prev.Value, 1);
+}
+
defset list<ValueType> ValueTypes = {
def OtherVT : ValueType<0, 1> { // "Other" value
let LLVMName = "Other";
}
-def i1 : VTInt<1, 2>; // One bit boolean value
-def i2 : VTInt<2, 3>; // 2-bit integer value
-def i4 : VTInt<4, 4>; // 4-bit integer value
-def i8 : VTInt<8, 5>; // 8-bit integer value
-def i16 : VTInt<16, 6>; // 16-bit integer value
-def i32 : VTInt<32, 7>; // 32-bit integer value
-def i64 : VTInt<64, 8>; // 64-bit integer value
-def i128 : VTInt<128, 9>; // 128-bit integer value
-
-def bf16 : VTFP<16, 10>; // 16-bit brain floating point value
-def f16 : VTFP<16, 11>; // 16-bit floating point value
-def f32 : VTFP<32, 12>; // 32-bit floating point value
-def f64 : VTFP<64, 13>; // 64-bit floating point value
-def f80 : VTFP<80, 14>; // 80-bit floating point value
-def f128 : VTFP<128, 15>; // 128-bit floating point value
-def ppcf128 : VTFP<128, 16>; // PPC 128-bit floating point value
-
-def v1i1 : VTVec<1, i1, 17>; // 1 x i1 vector value
-def v2i1 : VTVec<2, i1, 18>; // 2 x i1 vector value
-def v4i1 : VTVec<4, i1, 19>; // 4 x i1 vector value
-def v8i1 : VTVec<8, i1, 20>; // 8 x i1 vector value
-def v16i1 : VTVec<16, i1, 21>; // 16 x i1 vector value
-def v32i1 : VTVec<32, i1, 22>; // 32 x i1 vector value
-def v64i1 : VTVec<64, i1, 23>; // 64 x i1 vector value
-def v128i1 : VTVec<128, i1, 24>; // 128 x i1 vector value
-def v256i1 : VTVec<256, i1, 25>; // 256 x i1 vector value
-def v512i1 : VTVec<512, i1, 26>; // 512 x i1 vector value
-def v1024i1 : VTVec<1024, i1, 27>; // 1024 x i1 vector value
-def v2048i1 : VTVec<2048, i1, 28>; // 2048 x i1 vector value
-
-def v128i2 : VTVec<128, i2, 29>; // 128 x i2 vector value
-def v256i2 : VTVec<256, i2, 30>; // 256 x i2 vector value
-
-def v64i4 : VTVec<64, i4, 31>; // 64 x i4 vector value
-def v128i4 : VTVec<128, i4, 32>; // 128 x i4 vector value
-
-def v1i8 : VTVec<1, i8, 33>; // 1 x i8 vector value
-def v2i8 : VTVec<2, i8, 34>; // 2 x i8 vector value
-def v4i8 : VTVec<4, i8, 35>; // 4 x i8 vector value
-def v8i8 : VTVec<8, i8, 36>; // 8 x i8 vector value
-def v16i8 : VTVec<16, i8, 37>; // 16 x i8 vector value
-def v32i8 : VTVec<32, i8, 38>; // 32 x i8 vector value
-def v64i8 : VTVec<64, i8, 39>; // 64 x i8 vector value
-def v128i8 : VTVec<128, i8, 40>; // 128 x i8 vector value
-def v256i8 : VTVec<256, i8, 41>; // 256 x i8 vector value
-def v512i8 : VTVec<512, i8, 42>; // 512 x i8 vector value
-def v1024i8 : VTVec<1024, i8, 43>; // 1024 x i8 vector value
-
-def v1i16 : VTVec<1, i16, 44>; // 1 x i16 vector value
-def v2i16 : VTVec<2, i16, 45>; // 2 x i16 vector value
-def v3i16 : VTVec<3, i16, 46>; // 3 x i16 vector value
-def v4i16 : VTVec<4, i16, 47>; // 4 x i16 vector value
-def v8i16 : VTVec<8, i16, 48>; // 8 x i16 vector value
-def v16i16 : VTVec<16, i16, 49>; // 16 x i16 vector value
-def v32i16 : VTVec<32, i16, 50>; // 32 x i16 vector value
-def v64i16 : VTVec<64, i16, 51>; // 64 x i16 vector value
-def v128i16 : VTVec<128, i16, 52>; // 128 x i16 vector value
-def v256i16 : VTVec<256, i16, 53>; // 256 x i16 vector value
-def v512i16 : VTVec<512, i16, 54>; // 512 x i16 vector value
-
-def v1i32 : VTVec<1, i32, 55>; // 1 x i32 vector value
-def v2i32 : VTVec<2, i32, 56>; // 2 x i32 vector value
-def v3i32 : VTVec<3, i32, 57>; // 3 x i32 vector value
-def v4i32 : VTVec<4, i32, 58>; // 4 x i32 vector value
-def v5i32 : VTVec<5, i32, 59>; // 5 x i32 vector value
-def v6i32 : VTVec<6, i32, 60>; // 6 x f32 vector value
-def v7i32 : VTVec<7, i32, 61>; // 7 x f32 vector value
-def v8i32 : VTVec<8, i32, 62>; // 8 x i32 vector value
-def v9i32 : VTVec<9, i32, 63>; // 9 x i32 vector value
-def v10i32 : VTVec<10, i32, 64>; // 10 x i32 vector value
-def v11i32 : VTVec<11, i32, 65>; // 11 x i32 vector value
-def v12i32 : VTVec<12, i32, 66>; // 12 x i32 vector value
-def v16i32 : VTVec<16, i32, 67>; // 16 x i32 vector value
-def v32i32 : VTVec<32, i32, 68>; // 32 x i32 vector value
-def v64i32 : VTVec<64, i32, 69>; // 64 x i32 vector value
-def v128i32 : VTVec<128, i32, 70>; // 128 x i32 vector value
-def v256i32 : VTVec<256, i32, 71>; // 256 x i32 vector value
-def v512i32 : VTVec<512, i32, 72>; // 512 x i32 vector value
-def v1024i32 : VTVec<1024, i32, 73>; // 1024 x i32 vector value
-def v2048i32 : VTVec<2048, i32, 74>; // 2048 x i32 vector value
-
-def v1i64 : VTVec<1, i64, 75>; // 1 x i64 vector value
-def v2i64 : VTVec<2, i64, 76>; // 2 x i64 vector value
-def v3i64 : VTVec<3, i64, 77>; // 3 x i64 vector value
-def v4i64 : VTVec<4, i64, 78>; // 4 x i64 vector value
-def v8i64 : VTVec<8, i64, 79>; // 8 x i64 vector value
-def v16i64 : VTVec<16, i64, 80>; // 16 x i64 vector value
-def v32i64 : VTVec<32, i64, 81>; // 32 x i64 vector value
-def v64i64 : VTVec<64, i64, 82>; // 64 x i64 vector value
-def v128i64 : VTVec<128, i64, 83>; // 128 x i64 vector value
-def v256i64 : VTVec<256, i64, 84>; // 256 x i64 vector value
-
-def v1i128 : VTVec<1, i128, 85>; // 1 x i128 vector value
-
-def v1f16 : VTVec<1, f16, 86>; // 1 x f16 vector value
-def v2f16 : VTVec<2, f16, 87>; // 2 x f16 vector value
-def v3f16 : VTVec<3, f16, 88>; // 3 x f16 vector value
-def v4f16 : VTVec<4, f16, 89>; // 4 x f16 vector value
-def v8f16 : VTVec<8, f16, 90>; // 8 x f16 vector value
-def v16f16 : VTVec<16, f16, 91>; // 16 x f16 vector value
-def v32f16 : VTVec<32, f16, 92>; // 32 x f16 vector value
-def v64f16 : VTVec<64, f16, 93>; // 64 x f16 vector value
-def v128f16 : VTVec<128, f16, 94>; // 128 x f16 vector value
-def v256f16 : VTVec<256, f16, 95>; // 256 x f16 vector value
-def v512f16 : VTVec<512, f16, 96>; // 512 x f16 vector value
-
-def v2bf16 : VTVec<2, bf16, 97>; // 2 x bf16 vector value
-def v3bf16 : VTVec<3, bf16, 98>; // 3 x bf16 vector value
-def v4bf16 : VTVec<4, bf16, 99>; // 4 x bf16 vector value
-def v8bf16 : VTVec<8, bf16, 100>; // 8 x bf16 vector value
-def v16bf16 : VTVec<16, bf16, 101>; // 16 x bf16 vector value
-def v32bf16 : VTVec<32, bf16, 102>; // 32 x bf16 vector value
-def v64bf16 : VTVec<64, bf16, 103>; // 64 x bf16 vector value
-def v128bf16 : VTVec<128, bf16, 104>; // 128 x bf16 vector value
-
-def v1f32 : VTVec<1, f32, 105>; // 1 x f32 vector value
-def v2f32 : VTVec<2, f32, 106>; // 2 x f32 vector value
-def v3f32 : VTVec<3, f32, 107>; // 3 x f32 vector value
-def v4f32 : VTVec<4, f32, 108>; // 4 x f32 vector value
-def v5f32 : VTVec<5, f32, 109>; // 5 x f32 vector value
-def v6f32 : VTVec<6, f32, 110>; // 6 x f32 vector value
-def v7f32 : VTVec<7, f32, 111>; // 7 x f32 vector value
-def v8f32 : VTVec<8, f32, 112>; // 8 x f32 vector value
-def v9f32 : VTVec<9, f32, 113>; // 9 x f32 vector value
-def v10f32 : VTVec<10, f32, 114>; // 10 x f32 vector value
-def v11f32 : VTVec<11, f32, 115>; // 11 x f32 vector value
-def v12f32 : VTVec<12, f32, 116>; // 12 x f32 vector value
-def v16f32 : VTVec<16, f32, 117>; // 16 x f32 vector value
-def v32f32 : VTVec<32, f32, 118>; // 32 x f32 vector value
-def v64f32 : VTVec<64, f32, 119>; // 64 x f32 vector value
-def v128f32 : VTVec<128, f32, 120>; // 128 x f32 vector value
-def v256f32 : VTVec<256, f32, 121>; // 256 x f32 vector value
-def v512f32 : VTVec<512, f32, 122>; // 512 x f32 vector value
-def v1024f32 : VTVec<1024, f32, 123>; // 1024 x f32 vector value
-def v2048f32 : VTVec<2048, f32, 124>; // 2048 x f32 vector value
-
-def v1f64 : VTVec<1, f64, 125>; // 1 x f64 vector value
-def v2f64 : VTVec<2, f64, 126>; // 2 x f64 vector value
-def v3f64 : VTVec<3, f64, 127>; // 3 x f64 vector value
-def v4f64 : VTVec<4, f64, 128>; // 4 x f64 vector value
-def v8f64 : VTVec<8, f64, 129>; // 8 x f64 vector value
-def v16f64 : VTVec<16, f64, 130>; // 16 x f64 vector value
-def v32f64 : VTVec<32, f64, 131>; // 32 x f64 vector value
-def v64f64 : VTVec<64, f64, 132>; // 64 x f64 vector value
-def v128f64 : VTVec<128, f64, 133>; // 128 x f64 vector value
-def v256f64 : VTVec<256, f64, 134>; // 256 x f64 vector value
-
-def nxv1i1 : VTScalableVec<1, i1, 135>; // n x 1 x i1 vector value
-def nxv2i1 : VTScalableVec<2, i1, 136>; // n x 2 x i1 vector value
-def nxv4i1 : VTScalableVec<4, i1, 137>; // n x 4 x i1 vector value
-def nxv8i1 : VTScalableVec<8, i1, 138>; // n x 8 x i1 vector value
-def nxv16i1 : VTScalableVec<16, i1, 139>; // n x 16 x i1 vector value
-def nxv32i1 : VTScalableVec<32, i1, 140>; // n x 32 x i1 vector value
-def nxv64i1 : VTScalableVec<64, i1, 141>; // n x 64 x i1 vector value
-
-def nxv1i8 : VTScalableVec<1, i8, 142>; // n x 1 x i8 vector value
-def nxv2i8 : VTScalableVec<2, i8, 143>; // n x 2 x i8 vector value
-def nxv4i8 : VTScalableVec<4, i8, 144>; // n x 4 x i8 vector value
-def nxv8i8 : VTScalableVec<8, i8, 145>; // n x 8 x i8 vector value
-def nxv16i8 : VTScalableVec<16, i8, 146>; // n x 16 x i8 vector value
-def nxv32i8 : VTScalableVec<32, i8, 147>; // n x 32 x i8 vector value
-def nxv64i8 : VTScalableVec<64, i8, 148>; // n x 64 x i8 vector value
-
-def nxv1i16 : VTScalableVec<1, i16, 149>; // n x 1 x i16 vector value
-def nxv2i16 : VTScalableVec<2, i16, 150>; // n x 2 x i16 vector value
-def nxv4i16 : VTScalableVec<4, i16, 151>; // n x 4 x i16 vector value
-def nxv8i16 : VTScalableVec<8, i16, 152>; // n x 8 x i16 vector value
-def nxv16i16 : VTScalableVec<16, i16, 153>; // n x 16 x i16 vector value
-def nxv32i16 : VTScalableVec<32, i16, 154>; // n x 32 x i16 vector value
-
-def nxv1i32 : VTScalableVec<1, i32, 155>; // n x 1 x i32 vector value
-def nxv2i32 : VTScalableVec<2, i32, 156>; // n x 2 x i32 vector value
-def nxv4i32 : VTScalableVec<4, i32, 157>; // n x 4 x i32 vector value
-def nxv8i32 : VTScalableVec<8, i32, 158>; // n x 8 x i32 vector value
-def nxv16i32 : VTScalableVec<16, i32, 159>; // n x 16 x i32 vector value
-def nxv32i32 : VTScalableVec<32, i32, 160>; // n x 32 x i32 vector value
-
-def nxv1i64 : VTScalableVec<1, i64, 161>; // n x 1 x i64 vector value
-def nxv2i64 : VTScalableVec<2, i64, 162>; // n x 2 x i64 vector value
-def nxv4i64 : VTScalableVec<4, i64, 163>; // n x 4 x i64 vector value
-def nxv8i64 : VTScalableVec<8, i64, 164>; // n x 8 x i64 vector value
-def nxv16i64 : VTScalableVec<16, i64, 165>; // n x 16 x i64 vector value
-def nxv32i64 : VTScalableVec<32, i64, 166>; // n x 32 x i64 vector value
-
-def nxv1f16 : VTScalableVec<1, f16, 167>; // n x 1 x f16 vector value
-def nxv2f16 : VTScalableVec<2, f16, 168>; // n x 2 x f16 vector value
-def nxv4f16 : VTScalableVec<4, f16, 169>; // n x 4 x f16 vector value
-def nxv8f16 : VTScalableVec<8, f16, 170>; // n x 8 x f16 vector value
-def nxv16f16 : VTScalableVec<16, f16, 171>; // n x 16 x f16 vector value
-def nxv32f16 : VTScalableVec<32, f16, 172>; // n x 32 x f16 vector value
-
-def nxv1bf16 : VTScalableVec<1, bf16, 173>; // n x 1 x bf16 vector value
-def nxv2bf16 : VTScalableVec<2, bf16, 174>; // n x 2 x bf16 vector value
-def nxv4bf16 : VTScalableVec<4, bf16, 175>; // n x 4 x bf16 vector value
-def nxv8bf16 : VTScalableVec<8, bf16, 176>; // n x 8 x bf16 vector value
-def nxv16bf16 : VTScalableVec<16, bf16, 177>; // n x 16 x bf16 vector value
-def nxv32bf16 : VTScalableVec<32, bf16, 178>; // n x 32 x bf16 vector value
-
-def nxv1f32 : VTScalableVec<1, f32, 179>; // n x 1 x f32 vector value
-def nxv2f32 : VTScalableVec<2, f32, 180>; // n x 2 x f32 vector value
-def nxv4f32 : VTScalableVec<4, f32, 181>; // n x 4 x f32 vector value
-def nxv8f32 : VTScalableVec<8, f32, 182>; // n x 8 x f32 vector value
-def nxv16f32 : VTScalableVec<16, f32, 183>; // n x 16 x f32 vector value
-
-def nxv1f64 : VTScalableVec<1, f64, 184>; // n x 1 x f64 vector value
-def nxv2f64 : VTScalableVec<2, f64, 185>; // n x 2 x f64 vector value
-def nxv4f64 : VTScalableVec<4, f64, 186>; // n x 4 x f64 vector value
-def nxv8f64 : VTScalableVec<8, f64, 187>; // n x 8 x f64 vector value
-
-def x86mmx : ValueType<64, 188>; // X86 MMX value
-def FlagVT : ValueType<0, 189> { // Pre-RA sched glue
+def i1 : VTIntOrd<1, OtherVT>; // One bit boolean value
+def i2 : VTIntOrd<2, i1>; // 2-bit integer value
+def i4 : VTIntOrd<4, i2>; // 4-bit integer value
+def i8 : VTIntOrd<8, i4>; // 8-bit integer value
+def i16 : VTIntOrd<16, i8>; // 16-bit integer value
+def i32 : VTIntOrd<32, i16>; // 32-bit integer value
+def i64 : VTIntOrd<64, i32>; // 64-bit integer value
+def i128 : VTIntOrd<128, i64>; // 128-bit integer value
+
+def bf16 : VTFPOrd<16, i128>; // 16-bit brain floating point value
+def f16 : VTFPOrd<16, bf16>; // 16-bit floating point value
+def f32 : VTFPOrd<32, f16>; // 32-bit floating point value
+def f64 : VTFPOrd<64, f32>; // 64-bit floating point value
+def f80 : VTFPOrd<80, f64>; // 80-bit floating point value
+def f128 : VTFPOrd<128, f80>; // 128-bit floating point value
+def ppcf128 : VTFPOrd<128, f128>; // PPC 128-bit floating point value
+
+def v1i1 : VTVecOrd<1, i1, ppcf128>; // 1 x i1 vector value
+def v2i1 : VTVecOrd<2, i1, v1i1 >; // 2 x i1 vector value
+def v4i1 : VTVecOrd<4, i1, v2i1 >; // 4 x i1 vector value
+def v8i1 : VTVecOrd<8, i1, v4i1 >; // 8 x i1 vector value
+def v16i1 : VTVecOrd<16, i1, v8i1 >; // 16 x i1 vector value
+def v32i1 : VTVecOrd<32, i1, v16i1 >; // 32 x i1 vector value
+def v64i1 : VTVecOrd<64, i1, v32i1 >; // 64 x i1 vector value
+def v128i1 : VTVecOrd<128, i1, v64i1 >; // 128 x i1 vector value
+def v256i1 : VTVecOrd<256, i1, v128i1 >; // 256 x i1 vector value
+def v512i1 : VTVecOrd<512, i1, v256i1 >; // 512 x i1 vector value
+def v1024i1 : VTVecOrd<1024, i1, v512i1 >; // 1024 x i1 vector value
+def v2048i1 : VTVecOrd<2048, i1, v1024i1>; // 2048 x i1 vector value
+
+def v128i2 : VTVecOrd<128, i2, v2048i1>; // 128 x i2 vector value
+def v256i2 : VTVecOrd<256, i2, v128i2 >; // 256 x i2 vector value
+
+def v64i4 : VTVecOrd<64, i4, v256i2 >; // 64 x i4 vector value
+def v128i4 : VTVecOrd<128, i4, v64i4 >; // 128 x i4 vector value
+
+def v1i8 : VTVecOrd<1, i8, v128i4>; // 1 x i8 vector value
+def v2i8 : VTVecOrd<2, i8, v1i8 >; // 2 x i8 vector value
+def v4i8 : VTVecOrd<4, i8, v2i8 >; // 4 x i8 vector value
+def v8i8 : VTVecOrd<8, i8, v4i8 >; // 8 x i8 vector value
+def v16i8 : VTVecOrd<16, i8, v8i8 >; // 16 x i8 vector value
+def v32i8 : VTVecOrd<32, i8, v16i8 >; // 32 x i8 vector value
+def v64i8 : VTVecOrd<64, i8, v32i8 >; // 64 x i8 vector value
+def v128i8 : VTVecOrd<128, i8, v64i8 >; // 128 x i8 vector value
+def v256i8 : VTVecOrd<256, i8, v128i8>; // 256 x i8 vector value
+def v512i8 : VTVecOrd<512, i8, v256i8>; // 512 x i8 vector value
+def v1024i8 : VTVecOrd<1024, i8, v512i8>; // 1024 x i8 vector value
+
+def v1i16 : VTVecOrd<1, i16, v1024i8 >; // 1 x i16 vector value
+def v2i16 : VTVecOrd<2, i16, v1i16 >; // 2 x i16 vector value
+def v3i16 : VTVecOrd<3, i16, v2i16 >; // 3 x i16 vector value
+def v4i16 : VTVecOrd<4, i16, v3i16 >; // 4 x i16 vector value
+def v8i16 : VTVecOrd<8, i16, v4i16 >; // 8 x i16 vector value
+def v16i16 : VTVecOrd<16, i16, v8i16 >; // 16 x i16 vector value
+def v32i16 : VTVecOrd<32, i16, v16i16 >; // 32 x i16 vector value
+def v64i16 : VTVecOrd<64, i16, v32i16 >; // 64 x i16 vector value
+def v128i16 : VTVecOrd<128, i16, v64i16 >; // 128 x i16 vector value
+def v256i16 : VTVecOrd<256, i16, v128i16 >; // 256 x i16 vector value
+def v512i16 : VTVecOrd<512, i16, v256i16 >; // 512 x i16 vector value
+
+def v1i32 : VTVecOrd<1, i32, v512i16>; // 1 x i32 vector value
+def v2i32 : VTVecOrd<2, i32, v1i32 >; // 2 x i32 vector value
+def v3i32 : VTVecOrd<3, i32, v2i32 >; // 3 x i32 vector value
+def v4i32 : VTVecOrd<4, i32, v3i32 >; // 4 x i32 vector value
+def v5i32 : VTVecOrd<5, i32, v4i32 >; // 5 x i32 vector value
+def v6i32 : VTVecOrd<6, i32, v5i32 >; // 6 x f32 vector value
+def v7i32 : VTVecOrd<7, i32, v6i32 >; // 7 x f32 vector value
+def v8i32 : VTVecOrd<8, i32, v7i32 >; // 8 x i32 vector value
+def v9i32 : VTVecOrd<9, i32, v8i32 >; // 9 x i32 vector value
+def v10i32 : VTVecOrd<10, i32, v9i32 >; // 10 x i32 vector value
+def v11i32 : VTVecOrd<11, i32, v10i32 >; // 11 x i32 vector value
+def v12i32 : VTVecOrd<12, i32, v11i32 >; // 12 x i32 vector value
+def v16i32 : VTVecOrd<16, i32, v12i32 >; // 16 x i32 vector value
+def v32i32 : VTVecOrd<32, i32, v16i32 >; // 32 x i32 vector value
+def v64i32 : VTVecOrd<64, i32, v32i32 >; // 64 x i32 vector value
+def v128i32 : VTVecOrd<128, i32, v64i32 >; // 128 x i32 vector value
+def v256i32 : VTVecOrd<256, i32, v128i32 >; // 256 x i32 vector value
+def v512i32 : VTVecOrd<512, i32, v256i32 >; // 512 x i32 vector value
+def v1024i32 : VTVecOrd<1024, i32, v512i32 >; // 1024 x i32 vector value
+def v2048i32 : VTVecOrd<2048, i32, v1024i32>; // 2048 x i32 vector value
+
+def v1i64 : VTVecOrd<1, i64, v2048i32>; // 1 x i64 vector value
+def v2i64 : VTVecOrd<2, i64, v1i64 >; // 2 x i64 vector value
+def v3i64 : VTVecOrd<3, i64, v2i64 >; // 3 x i64 vector value
+def v4i64 : VTVecOrd<4, i64, v3i64 >; // 4 x i64 vector value
+def v8i64 : VTVecOrd<8, i64, v4i64 >; // 8 x i64 vector value
+def v16i64 : VTVecOrd<16, i64, v8i64 >; // 16 x i64 vector value
+def v32i64 : VTVecOrd<32, i64, v16i64 >; // 32 x i64 vector value
+def v64i64 : VTVecOrd<64, i64, v32i64 >; // 64 x i64 vector value
+def v128i64 : VTVecOrd<128, i64, v64i64 >; // 128 x i64 vector value
+def v256i64 : VTVecOrd<256, i64, v128i64 >; // 256 x i64 vector value
+
+def v1i128 : VTVecOrd<1, i128, v256i64 >; // 1 x i128 vector value
+
+def v1f16 : VTVecOrd<1, f16, v1i128 >; // 1 x f16 vector value
+def v2f16 : VTVecOrd<2, f16, v1f16 >; // 2 x f16 vector value
+def v3f16 : VTVecOrd<3, f16, v2f16 >; // 3 x f16 vector value
+def v4f16 : VTVecOrd<4, f16, v3f16 >; // 4 x f16 vector value
+def v8f16 : VTVecOrd<8, f16, v4f16 >; // 8 x f16 vector value
+def v16f16 : VTVecOrd<16, f16, v8f16 >; // 16 x f16 vector value
+def v32f16 : VTVecOrd<32, f16, v16f16 >; // 32 x f16 vector value
+def v64f16 : VTVecOrd<64, f16, v32f16 >; // 64 x f16 vector value
+def v128f16 : VTVecOrd<128, f16, v64f16 >; // 128 x f16 vector value
+def v256f16 : VTVecOrd<256, f16, v128f16>; // 256 x f16 vector value
+def v512f16 : VTVecOrd<512, f16, v256f16>; // 512 x f16 vector value
+def v2bf16 : VTVecOrd<2, bf16, v512f16>; // 2 x bf16 vector value
+def v3bf16 : VTVecOrd<3, bf16, v2bf16 >; // 3 x bf16 vector value
+def v4bf16 : VTVecOrd<4, bf16, v3bf16 >; // 4 x bf16 vector value
+def v8bf16 : VTVecOrd<8, bf16, v4bf16 >; // 8 x bf16 vector value
+def v16bf16 : VTVecOrd<16, bf16, v8bf16 >; // 16 x bf16 vector value
+def v32bf16 : VTVecOrd<32, bf16, v16bf16 >; // 32 x bf16 vector value
+def v64bf16 : VTVecOrd<64, bf16, v32bf16 >; // 64 x bf16 vector value
+def v128bf16 : VTVecOrd<128, bf16, v64bf16 >; // 128 x bf16 vector value
+
+def v1f32 : VTVecOrd<1, f32, v128bf16>; // 1 x f32 vector value
+def v2f32 : VTVecOrd<2, f32, v1f32 >; // 2 x f32 vector value
+def v3f32 : VTVecOrd<3, f32, v2f32 >; // 3 x f32 vector value
+def v4f32 : VTVecOrd<4, f32, v3f32 >; // 4 x f32 vector value
+def v5f32 : VTVecOrd<5, f32, v4f32 >; // 5 x f32 vector value
+def v6f32 : VTVecOrd<6, f32, v5f32 >; // 6 x f32 vector value
+def v7f32 : VTVecOrd<7, f32, v6f32 >; // 7 x f32 vector value
+def v8f32 : VTVecOrd<8, f32, v7f32 >; // 8 x f32 vector value
+def v9f32 : VTVecOrd<9, f32, v8f32 >; // 9 x f32 vector value
+def v10f32 : VTVecOrd<10, f32, v9f32 >; // 10 x f32 vector value
+def v11f32 : VTVecOrd<11, f32, v10f32 >; // 11 x f32 vector value
+def v12f32 : VTVecOrd<12, f32, v11f32 >; // 12 x f32 vector value
+def v16f32 : VTVecOrd<16, f32, v12f32 >; // 16 x f32 vector value
+def v32f32 : VTVecOrd<32, f32, v16f32 >; // 32 x f32 vector value
+def v64f32 : VTVecOrd<64, f32, v32f32 >; // 64 x f32 vector value
+def v128f32 : VTVecOrd<128, f32, v64f32 >; // 128 x f32 vector value
+def v256f32 : VTVecOrd<256, f32, v128f32 >; // 256 x f32 vector value
+def v512f32 : VTVecOrd<512, f32, v256f32 >; // 512 x f32 vector value
+def v1024f32 : VTVecOrd<1024, f32, v512f32 >; // 1024 x f32 vector value
+def v2048f32 : VTVecOrd<2048, f32, v1024f32>; // 2048 x f32 vector value
+
+def v1f64 : VTVecOrd<1, f64, v2048f32>; // 1 x f64 vector value
+def v2f64 : VTVecOrd<2, f64, v1f64 >; // 2 x f64 vector value
+def v3f64 : VTVecOrd<3, f64, v2f64 >; // 3 x f64 vector value
+def v4f64 : VTVecOrd<4, f64, v3f64 >; // 4 x f64 vector value
+def v8f64 : VTVecOrd<8, f64, v4f64 >; // 8 x f64 vector value
+def v16f64 : VTVecOrd<16, f64, v8f64 >; // 16 x f64 vector value
+def v32f64 : VTVecOrd<32, f64, v16f64 >; // 32 x f64 vector value
+def v64f64 : VTVecOrd<64, f64, v32f64 >; // 64 x f64 vector value
+def v128f64 : VTVecOrd<128, f64, v64f64 >; // 128 x f64 vector value
+def v256f64 : VTVecOrd<256, f64, v128f64 >; // 256 x f64 vector value
+
+def nxv1i1 : VTScalVecOrd<1, i1, v256f64>; // n x 1 x i1 vector value
+def nxv2i1 : VTScalVecOrd<2, i1, nxv1i1 >; // n x 2 x i1 vector value
+def nxv4i1 : VTScalVecOrd<4, i1, nxv2i1 >; // n x 4 x i1 vector value
+def nxv8i1 : VTScalVecOrd<8, i1, nxv4i1 >; // n x 8 x i1 vector value
+def nxv16i1 : VTScalVecOrd<16, i1, nxv8i1 >; // n x 16 x i1 vector value
+def nxv32i1 : VTScalVecOrd<32, i1, nxv16i1 >; // n x 32 x i1 vector value
+def nxv64i1 : VTScalVecOrd<64, i1, nxv32i1 >; // n x 64 x i1 vector value
+def nxv1i8 : VTScalVecOrd<1, i8, nxv64i1 >; // n x 1 x i8 vector value
+def nxv2i8 : VTScalVecOrd<2, i8, nxv1i8 >; // n x 2 x i8 vector value
+def nxv4i8 : VTScalVecOrd<4, i8, nxv2i8 >; // n x 4 x i8 vector value
+def nxv8i8 : VTScalVecOrd<8, i8, nxv4i8 >; // n x 8 x i8 vector value
+def nxv16i8 : VTScalVecOrd<16, i8, nxv8i8 >; // n x 16 x i8 vector value
+def nxv32i8 : VTScalVecOrd<32, i8, nxv16i8 >; // n x 32 x i8 vector value
+def nxv64i8 : VTScalVecOrd<64, i8, nxv32i8 >; // n x 64 x i8 vector value
+
+def nxv1i16 : VTScalVecOrd<1, i16, nxv64i8 >; // n x 1 x i16 vector value
+def nxv2i16 : VTScalVecOrd<2, i16, nxv1i16 >; // n x 2 x i16 vector value
+def nxv4i16 : VTScalVecOrd<4, i16, nxv2i16 >; // n x 4 x i16 vector value
+def nxv8i16 : VTScalVecOrd<8, i16, nxv4i16 >; // n x 8 x i16 vector value
+def nxv16i16 : VTScalVecOrd<16, i16, nxv8i16 >; // n x 16 x i16 vector value
+def nxv32i16 : VTScalVecOrd<32, i16, nxv16i16>; // n x 32 x i16 vector value
+
+def nxv1i32 : VTScalVecOrd<1, i32, nxv32i16>; // n x 1 x i32 vector value
+def nxv2i32 : VTScalVecOrd<2, i32, nxv1i32 >; // n x 2 x i32 vector value
+def nxv4i32 : VTScalVecOrd<4, i32, nxv2i32 >; // n x 4 x i32 vector value
+def nxv8i32 : VTScalVecOrd<8, i32, nxv4i32 >; // n x 8 x i32 vector value
+def nxv16i32 : VTScalVecOrd<16, i32, nxv8i32 >; // n x 16 x i32 vector value
+def nxv32i32 : VTScalVecOrd<32, i32, nxv16i32>; // n x 32 x i32 vector value
+
+def nxv1i64 : VTScalVecOrd<1, i64, nxv32i32>; // n x 1 x i64 vector value
+def nxv2i64 : VTScalVecOrd<2, i64, nxv1i64 >; // n x 2 x i64 vector value
+def nxv4i64 : VTScalVecOrd<4, i64, nxv2i64 >; // n x 4 x i64 vector value
+def nxv8i64 : VTScalVecOrd<8, i64, nxv4i64 >; // n x 8 x i64 vector value
+def nxv16i64 : VTScalVecOrd<16, i64, nxv8i64 >; // n x 16 x i64 vector value
+def nxv32i64 : VTScalVecOrd<32, i64, nxv16i64>; // n x 32 x i64 vector value
+
+def nxv1f16 : VTScalVecOrd<1, f16, nxv32i64 >; // n x 1 x f16 vector value
+def nxv2f16 : VTScalVecOrd<2, f16, nxv1f16 >; // n x 2 x f16 vector value
+def nxv4f16 : VTScalVecOrd<4, f16, nxv2f16 >; // n x 4 x f16 vector value
+def nxv8f16 : VTScalVecOrd<8, f16, nxv4f16 >; // n x 8 x f16 vector value
+def nxv16f16 : VTScalVecOrd<16, f16, nxv8f16 >; // n x 16 x f16 vector value
+def nxv32f16 : VTScalVecOrd<32, f16, nxv16f16 >; // n x 32 x f16 vector value
+def nxv1bf16 : VTScalVecOrd<1, bf16, nxv32f16 >; // n x 1 x bf16 vector value
+def nxv2bf16 : VTScalVecOrd<2, bf16, nxv1bf16 >; // n x 2 x bf16 vector value
+def nxv4bf16 : VTScalVecOrd<4, bf16, nxv2bf16 >; // n x 4 x bf16 vector value
+def nxv8bf16 : VTScalVecOrd<8, bf16, nxv4bf16 >; // n x 8 x bf16 vector value
+def nxv16bf16 : VTScalVecOrd<16, bf16, nxv8bf16 >; // n x 16 x bf16 vector value
+def nxv32bf16 : VTScalVecOrd<32, bf16, nxv16bf16>; // n x 16 x bf16 vector value
+
+def nxv1f32 : VTScalVecOrd<1, f32, nxv32bf16>; // n x 1 x f32 vector value
+def nxv2f32 : VTScalVecOrd<2, f32, nxv1f32 >; // n x 2 x f32 vector value
+def nxv4f32 : VTScalVecOrd<4, f32, nxv2f32 >; // n x 4 x f32 vector value
+def nxv8f32 : VTScalVecOrd<8, f32, nxv4f32 >; // n x 8 x f32 vector value
+def nxv16f32 : VTScalVecOrd<16, f32, nxv8f32 >; // n x 16 x f32 vector value
+
+def nxv1f64 : VTScalVecOrd<1, f64, nxv16f32>; // n x 1 x f64 vector value
+def nxv2f64 : VTScalVecOrd<2, f64, nxv1f64 >; // n x 2 x f64 vector value
+def nxv4f64 : VTScalVecOrd<4, f64, nxv2f64 >; // n x 4 x f64 vector value
+def nxv8f64 : VTScalVecOrd<8, f64, nxv4f64 >; // n x 8 x f64 vector value
+
+def x86mmx : VTOrd<64, nxv8f64>; // X86 MMX value
+def FlagVT : VTOrd<0, x86mmx> { // Pre-RA sched glue
let LLVMName = "Glue";
}
-def isVoid : ValueType<0, 190>; // Produces no value
-def untyped : ValueType<8, 191> { // Produces an untyped value
+def isVoid : VTOrd<0, FlagVT>; // Produces no value
+def untyped : VTOrd<8, isVoid> { // Produces an untyped value
let LLVMName = "Untyped";
}
-def funcref : ValueType<0, 192>; // WebAssembly's funcref type
-def externref : ValueType<0, 193>; // WebAssembly's externref type
-def x86amx : ValueType<8192, 194>; // X86 AMX value
-def i64x8 : ValueType<512, 195>; // 8 Consecutive GPRs (AArch64)
+def funcref : VTOrd<0, untyped>; // WebAssembly's funcref type
+def externref : VTOrd<0, funcref>; // WebAssembly's externref type
+def x86amx : VTOrd<8192, externref>; // X86 AMX value
+def i64x8 : VTOrd<512, x86amx>; // 8 Consecutive GPRs (AArch64)
def aarch64svcount
- : ValueType<16, 196>; // AArch64 predicate-as-counter
-def spirvbuiltin : ValueType<0, 197>; // SPIR-V's builtin type
-
-def token : ValueType<0, 248>; // TokenTy
-def MetadataVT : ValueType<0, 249> { // Metadata
- let LLVMName = "Metadata";
+ : VTOrd<16, i64x8>; // AArch64 predicate-as-counter
+def spirvbuiltin : VTOrd<0, aarch64svcount>; // SPIR-V's builtin type
+
+// The remaining list isn't used by the Legalizer so their values are meant
+// to be in the range [VALUETYPE_SIZE, MAX_ALLOWED_VALUETYPE). The token
+// type is used as the anchor so if the previous list grows past its current
+// value of 248, it simply needs to be updated so that the rest of the list
+// fills out the last available values. Effectively, this is a decrement if
+// a VT is added with a value larger than that of token and an increase by
+// 64 if VALUETYPE_SIZE grows past the current value of token (248).
+let ForLegalizer = false in {
+ def token : ValueType<0, 248>; // TokenTy
+ def MetadataVT : VTOrd<0, token> { // Metadata
+ let LLVMName = "Metadata";
+ }
+
+ // Pseudo valuetype mapped to the current pointer size to any address space.
+ // Should only be used in TableGen.
+ def iPTRAny : VTAnyOrd<MetadataVT>;
+
+ // Pseudo valuetype to represent "vector of any size"
+ def vAny : VTAnyOrd<iPTRAny>;
+
+ // Pseudo valuetype to represent "float of any format"
+ def fAny : VTAnyOrd<vAny>;
+
+ // Pseudo valuetype to represent "integer of any bit width"
+ def iAny : VTAnyOrd<fAny>;
+
+ // Pseudo valuetype mapped to the current pointer size.
+ def iPTR : VTOrd<0, iAny>;
+
+ // Pseudo valuetype to represent "any type of any size".
+ def Any : VTAnyOrd<iPTR>;
}
-// Pseudo valuetype mapped to the current pointer size to any address space.
-// Should only be used in TableGen.
-def iPTRAny : VTAny<250>;
-
-// Pseudo valuetype to represent "vector of any size"
-def vAny : VTAny<251>;
-
-// Pseudo valuetype to represent "float of any format"
-def fAny : VTAny<252>;
-
-// Pseudo valuetype to represent "integer of any bit width"
-def iAny : VTAny<253>;
-
-// Pseudo valuetype mapped to the current pointer size.
-def iPTR : ValueType<0, 254>;
-
-// Pseudo valuetype to represent "any type of any size".
-def Any : VTAny<255>;
-
} // end defset ValueTypes
/// This class is for targets that want to use pointer types in patterns
@@ -320,4 +352,5 @@ def Any : VTAny<255>;
class PtrValueType <ValueType scalar, int addrspace> :
ValueType<scalar.Size, scalar.Value> {
int AddrSpace = addrspace;
+ let ForLegalizer = false;
}
diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.h b/llvm/utils/TableGen/CodeGenDAGPatterns.h
index 2611fe06f55ca53..5c83d802382fc27 100644
--- a/llvm/utils/TableGen/CodeGenDAGPatterns.h
+++ b/llvm/utils/TableGen/CodeGenDAGPatterns.h
@@ -47,16 +47,17 @@ class CodeGenDAGPatterns;
/// Shared pointer for TreePatternNode.
using TreePatternNodePtr = IntrusiveRefCntPtr<TreePatternNode>;
-/// This represents a set of MVTs. Since the underlying type for the MVT
-/// is uint8_t, there are at most 256 values. To reduce the number of memory
+/// This represents a set of MVTs. There are at most MVT::MAX_ALLOWED_VALUETYPE
+/// MVTs and that value is kept as low as possible while allowing all needed
+/// MVTs to be contained. To reduce the number of memory
/// allocations and deallocations, represent the set as a sequence of bits.
/// To reduce the allocations even further, make MachineValueTypeSet own
/// the storage and use std::array as the bit container.
struct MachineValueTypeSet {
static_assert(std::is_same<std::underlying_type_t<MVT::SimpleValueType>,
- uint8_t>::value,
- "Change uint8_t here to the SimpleValueType's type");
- static unsigned constexpr Capacity = std::numeric_limits<uint8_t>::max()+1;
+ uint16_t>::value,
+ "Change uint16_t here to the SimpleValueType's type");
+ static unsigned constexpr Capacity = MVT::MAX_ALLOWED_VALUETYPE;
using WordType = uint64_t;
static unsigned constexpr WordWidth = CHAR_BIT*sizeof(WordType);
static unsigned constexpr NumWords = Capacity/WordWidth;
diff --git a/llvm/utils/TableGen/VTEmitter.cpp b/llvm/utils/TableGen/VTEmitter.cpp
index 5ec1f59318f7847..a082a9a689ae927 100644
--- a/llvm/utils/TableGen/VTEmitter.cpp
+++ b/llvm/utils/TableGen/VTEmitter.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/TableGenBackend.h"
@@ -32,13 +33,15 @@ class VTEmitter {
void VTEmitter::run(raw_ostream &OS) {
emitSourceFileHeader("ValueTypes Source Fragment", OS, Records);
- std::array<const Record *, 256> VTsByNumber = {};
+ std::vector<const Record *> VTsByNumber(256);
auto ValueTypes = Records.getAllDerivedDefinitions("ValueType");
for (auto *VT : ValueTypes) {
auto Number = VT->getValueAsInt("Value");
- assert(0 <= Number && Number < (int)VTsByNumber.size() &&
- "ValueType should be uint8_t");
assert(!VTsByNumber[Number] && "Duplicate ValueType");
+ assert(0 <= Number && Number <= std::numeric_limits<uint16_t>::max() &&
+ "ValueType should be uint16_t");
+ if ((size_t)Number >= VTsByNumber.size())
+ VTsByNumber.resize(Number);
VTsByNumber[Number] = VT;
}
@@ -72,6 +75,7 @@ void VTEmitter::run(raw_ostream &OS) {
bool IsFP = VT->getValueAsInt("isFP");
bool IsVector = VT->getValueAsInt("isVector");
bool IsScalable = VT->getValueAsInt("isScalable");
+ bool ForLegalizer = VT->getValueAsBit("ForLegalizer");
UpdateVTRange("INTEGER_FIXEDLEN_VECTOR_VALUETYPE", Name,
IsInteger && IsVector && !IsScalable);
@@ -85,7 +89,7 @@ void VTEmitter::run(raw_ostream &OS) {
UpdateVTRange("VECTOR_VALUETYPE", Name, IsVector);
UpdateVTRange("INTEGER_VALUETYPE", Name, IsInteger && !IsVector);
UpdateVTRange("FP_VALUETYPE", Name, IsFP && !IsVector);
- UpdateVTRange("VALUETYPE", Name, Value < 224);
+ UpdateVTRange("VALUETYPE", Name, ForLegalizer);
// clang-format off
OS << " GET_VT_ATTR("
More information about the llvm-commits
mailing list