[llvm] [X86] Support EGPR (R16-R31) for APX (PR #70958)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 1 09:56:06 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Shengchen Kan (KanRobert)
<details>
<summary>Changes</summary>
1. Map R16-R31 to DWARF registers 130-145.
2. Make R16-R31 caller-saved registers.
3. Make R16-31 allocatable only when feature EGPR is supported
4. Make R16-31 availabe for instructions in legacy maps 0/1 and EVEX
space, except XSAVE*/XRSTOR
RFC:
https://discourse.llvm.org/t/rfc-design-for-apx-feature-egpr-and-ndd-support/73031/4
Explanations for some seemingly unrelated changes:
inline-asm-registers.mir, statepoint-invoke-ra-enter-at-end.mir:
The immediate (TargetInstrInfo.cpp:1612) used for the regdef/reguse is
the encoding for the register
class in the enum generated by tablegen. This encoding will change
any time a new register class is added. Since the number is part
of the input, this means it can become stale.
seh-directive-errors.s:
R16-R31 makes ".seh_pushreg 17" legal
musttail-varargs.ll:
It seems some LLVM passes use the number of registers rather the number
of allocatable registers as heuristic.
---
Patch is 77.59 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/70958.diff
17 Files Affected:
- (modified) llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h (+37)
- (modified) llvm/lib/Target/X86/X86.td (+2)
- (modified) llvm/lib/Target/X86/X86InstrInfo.cpp (+31)
- (modified) llvm/lib/Target/X86/X86InstrInfo.h (+11)
- (modified) llvm/lib/Target/X86/X86RegisterInfo.cpp (+29-4)
- (modified) llvm/lib/Target/X86/X86RegisterInfo.td (+188-15)
- (modified) llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir (+4-4)
- (added) llvm/test/CodeGen/X86/apx/mul-i1024.ll (+1039)
- (added) llvm/test/CodeGen/X86/apx/no-rex2-general.ll (+81)
- (added) llvm/test/CodeGen/X86/apx/no-rex2-pseudo-amx.ll (+18)
- (added) llvm/test/CodeGen/X86/apx/no-rex2-pseudo-x87.ll (+18)
- (added) llvm/test/CodeGen/X86/apx/no-rex2-special.ll (+70)
- (modified) llvm/test/CodeGen/X86/ipra-reg-usage.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/musttail-varargs.ll (+24-24)
- (modified) llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir (+2-2)
- (modified) llvm/test/MC/AsmParser/seh-directive-errors.s (+1-1)
- (added) llvm/test/MC/X86/apx/cfi-reg.s (+41)
``````````diff
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index e6db840c0802091..3ccc73398064b76 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -1237,6 +1237,43 @@ namespace X86II {
return false;
}
+ inline bool canUseApxExtendedReg(const MCInstrDesc &Desc) {
+ uint64_t TSFlags = Desc.TSFlags;
+ uint64_t Encoding = TSFlags & EncodingMask;
+ // EVEX can always use egpr.
+ if (Encoding == X86II::EVEX)
+ return true;
+
+ // To be conservative, egpr is not used for all pseudo instructions
+ // because we are not sure what instruction it will become.
+ // FIXME: Could we improve it in X86ExpandPseudo?
+ if (isPseudo(TSFlags))
+ return false;
+
+ // MAP OB/TB in legacy encoding space can always use egpr except
+ // XSAVE*/XRSTOR*.
+ unsigned Opcode = Desc.Opcode;
+ switch (Opcode) {
+ default:
+ break;
+ case X86::XSAVE:
+ case X86::XSAVE64:
+ case X86::XSAVEOPT:
+ case X86::XSAVEOPT64:
+ case X86::XSAVEC:
+ case X86::XSAVEC64:
+ case X86::XSAVES:
+ case X86::XSAVES64:
+ case X86::XRSTOR:
+ case X86::XRSTOR64:
+ case X86::XRSTORS:
+ case X86::XRSTORS64:
+ return false;
+ }
+ uint64_t OpMap = TSFlags & X86II::OpMapMask;
+ return !Encoding && (OpMap == X86II::OB || OpMap == X86II::TB);
+ }
+
/// \returns true if the MemoryOperand is a 32 extended (zmm16 or higher)
/// registers, e.g. zmm21, etc.
static inline bool is32ExtendedReg(unsigned RegNo) {
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index e2935a687f98b5e..ade175d99c89a8d 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -341,6 +341,8 @@ def FeatureAVX10_1 : SubtargetFeature<"avx10.1-256", "HasAVX10_1", "true",
def FeatureAVX10_1_512 : SubtargetFeature<"avx10.1-512", "HasAVX10_1_512", "true",
"Support AVX10.1 up to 512-bit instruction",
[FeatureAVX10_1, FeatureEVEX512]>;
+def FeatureEGPR : SubtargetFeature<"egpr", "HasEGPR", "true",
+ "Support extended general purpose register">;
// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
// "string operations"). See "REP String Enhancement" in the Intel Software
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 4c6854da0ada3d2..56e3ac79b5957a1 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -92,6 +92,37 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
Subtarget(STI), RI(STI.getTargetTriple()) {
}
+const TargetRegisterClass *
+X86InstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
+ const TargetRegisterInfo *TRI,
+ const MachineFunction &MF) const {
+ auto *RC = TargetInstrInfo::getRegClass(MCID, OpNum, TRI, MF);
+ // If the target does not have egpr, then r16-r31 will be resereved for all
+ // instructions.
+ if (!RC || !Subtarget.hasEGPR())
+ return RC;
+
+ if (X86II::canUseApxExtendedReg(MCID))
+ return RC;
+
+ switch (RC->getID()) {
+ default:
+ return RC;
+ case X86::GR8RegClassID:
+ return &X86::GR8_NOREX2RegClass;
+ case X86::GR16RegClassID:
+ return &X86::GR16_NOREX2RegClass;
+ case X86::GR32RegClassID:
+ return &X86::GR32_NOREX2RegClass;
+ case X86::GR64RegClassID:
+ return &X86::GR64_NOREX2RegClass;
+ case X86::GR32_NOSPRegClassID:
+ return &X86::GR32_NOREX2_NOSPRegClass;
+ case X86::GR64_NOSPRegClassID:
+ return &X86::GR64_NOREX2_NOSPRegClass;
+ }
+}
+
bool
X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
Register &SrcReg, Register &DstReg,
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index e1199e20c318e24..b0a2d2b89074348 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -150,6 +150,17 @@ class X86InstrInfo final : public X86GenInstrInfo {
public:
explicit X86InstrInfo(X86Subtarget &STI);
+ /// Given a machine instruction descriptor, returns the register
+ /// class constraint for OpNum, or NULL. Returned register class
+ /// may be different from the definition in the TD file, e.g.
+ /// GR*RegClass (definition in TD file)
+ /// ->
+ /// GR*_NOREX2RegClass (Returned register class)
+ const TargetRegisterClass *
+ getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
+ const TargetRegisterInfo *TRI,
+ const MachineFunction &MF) const override;
+
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 4fd8b6d17e862e0..901dcf823d6d15b 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -158,6 +158,10 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
case X86::GR16RegClassID:
case X86::GR32RegClassID:
case X86::GR64RegClassID:
+ case X86::GR8_NOREX2RegClassID:
+ case X86::GR16_NOREX2RegClassID:
+ case X86::GR32_NOREX2RegClassID:
+ case X86::GR64_NOREX2RegClassID:
case X86::RFP32RegClassID:
case X86::RFP64RegClassID:
case X86::RFP80RegClassID:
@@ -611,6 +615,14 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
}
}
+ // Reserve the extended general purpose registers.
+ if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasEGPR()) {
+ for (unsigned n = 0; n != 16; ++n) {
+ for (MCRegAliasIterator AI(X86::R16 + n, this, true); AI.isValid(); ++AI)
+ Reserved.set(*AI);
+ }
+ }
+
assert(checkAllSuperRegsMarked(Reserved,
{X86::SIL, X86::DIL, X86::BPL, X86::SPL,
X86::SIH, X86::DIH, X86::BPH, X86::SPH}));
@@ -629,12 +641,25 @@ unsigned X86RegisterInfo::getNumSupportedRegs(const MachineFunction &MF) const {
// APX registers (R16-R31)
//
// and try to return the minimum number of registers supported by the target.
-
assert((X86::R15WH + 1 == X86 ::YMM0) && (X86::YMM15 + 1 == X86::K0) &&
- (X86::K6_K7 + 1 == X86::TMMCFG) &&
- (X86::TMM7 + 1 == X86::NUM_TARGET_REGS) &&
+ (X86::K6_K7 + 1 == X86::TMMCFG) && (X86::TMM7 + 1 == X86::R16) &&
+ (X86::R31WH + 1 == X86::NUM_TARGET_REGS) &&
"Register number may be incorrect");
- return X86::NUM_TARGET_REGS;
+
+ const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
+ bool HasAVX = ST.hasAVX();
+ bool HasAVX512 = ST.hasAVX512();
+ bool HasAMX = ST.hasAMXTILE();
+ bool HasEGPR = ST.hasEGPR();
+ if (HasEGPR)
+ return X86::NUM_TARGET_REGS;
+ if (HasAMX)
+ return X86::TMM7 + 1;
+ if (HasAVX512)
+ return X86::K6_K7 + 1;
+ if (HasAVX)
+ return X86::YMM15 + 1;
+ return X86::YMM0;
}
bool X86RegisterInfo::isArgumentRegister(const MachineFunction &MF,
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td
index 898a3f97e5236df..166024bf3b53fe1 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -73,6 +73,44 @@ def R12B : X86Reg<"r12b", 12>;
def R13B : X86Reg<"r13b", 13>;
def R14B : X86Reg<"r14b", 14>;
def R15B : X86Reg<"r15b", 15>;
+// RAGreedy prefers to select a cheaper register
+// For x86,
+// Cost(caller-save reg) < Cost(callee-save reg)
+// b/c callee-save register needs push/pop in prolog/epilog.
+// If both registers are callee-saved or caller-saved,
+// Cost(short-encoding reg) < Cost(long-encoding reg)
+//
+// To achieve this, we do the following things:
+// 1. Set CostPerUse=1 for registers that need prefix
+// 2. Consider callee-save register is never cheaper than a register w/ cost 1
+// 3. List caller-save register before callee-save regsiter in RegisterClass
+// or AllocationOrder
+//
+// NOTE:
+// D133902 stopped assigning register costs for R8-R15, which brought gain
+// and regression. We don't know if we should assign cost to R16-R31 w/o
+// performance data.
+// TODO:
+// Update the comment/cost after tuning.
+// APX only, requires REX2 or EVEX.
+let PositionOrder = 4 in {
+def R16B : X86Reg<"r16b", 16>;
+def R17B : X86Reg<"r17b", 17>;
+def R18B : X86Reg<"r18b", 18>;
+def R19B : X86Reg<"r19b", 19>;
+def R20B : X86Reg<"r20b", 20>;
+def R21B : X86Reg<"r21b", 21>;
+def R22B : X86Reg<"r22b", 22>;
+def R23B : X86Reg<"r23b", 23>;
+def R24B : X86Reg<"r24b", 24>;
+def R25B : X86Reg<"r25b", 25>;
+def R26B : X86Reg<"r26b", 26>;
+def R27B : X86Reg<"r27b", 27>;
+def R28B : X86Reg<"r28b", 28>;
+def R29B : X86Reg<"r29b", 29>;
+def R30B : X86Reg<"r30b", 30>;
+def R31B : X86Reg<"r31b", 31>;
+}
let isArtificial = 1 in {
// High byte of the low 16 bits of the super-register:
@@ -88,6 +126,24 @@ def R12BH : X86Reg<"", -1>;
def R13BH : X86Reg<"", -1>;
def R14BH : X86Reg<"", -1>;
def R15BH : X86Reg<"", -1>;
+let PositionOrder = 4 in {
+def R16BH : X86Reg<"", -1>;
+def R17BH : X86Reg<"", -1>;
+def R18BH : X86Reg<"", -1>;
+def R19BH : X86Reg<"", -1>;
+def R20BH : X86Reg<"", -1>;
+def R21BH : X86Reg<"", -1>;
+def R22BH : X86Reg<"", -1>;
+def R23BH : X86Reg<"", -1>;
+def R24BH : X86Reg<"", -1>;
+def R25BH : X86Reg<"", -1>;
+def R26BH : X86Reg<"", -1>;
+def R27BH : X86Reg<"", -1>;
+def R28BH : X86Reg<"", -1>;
+def R29BH : X86Reg<"", -1>;
+def R30BH : X86Reg<"", -1>;
+def R31BH : X86Reg<"", -1>;
+}
// High word of the low 32 bits of the super-register:
def HAX : X86Reg<"", -1>;
def HDX : X86Reg<"", -1>;
@@ -106,6 +162,24 @@ def R12WH : X86Reg<"", -1>;
def R13WH : X86Reg<"", -1>;
def R14WH : X86Reg<"", -1>;
def R15WH : X86Reg<"", -1>;
+let PositionOrder = 4 in {
+def R16WH : X86Reg<"", -1>;
+def R17WH : X86Reg<"", -1>;
+def R18WH : X86Reg<"", -1>;
+def R19WH : X86Reg<"", -1>;
+def R20WH : X86Reg<"", -1>;
+def R21WH : X86Reg<"", -1>;
+def R22WH : X86Reg<"", -1>;
+def R23WH : X86Reg<"", -1>;
+def R24WH : X86Reg<"", -1>;
+def R25WH : X86Reg<"", -1>;
+def R26WH : X86Reg<"", -1>;
+def R27WH : X86Reg<"", -1>;
+def R28WH : X86Reg<"", -1>;
+def R29WH : X86Reg<"", -1>;
+def R30WH : X86Reg<"", -1>;
+def R31WH : X86Reg<"", -1>;
+}
}
// 16-bit registers
@@ -134,6 +208,27 @@ def R13W : X86Reg<"r13w", 13, [R13B,R13BH]>;
def R14W : X86Reg<"r14w", 14, [R14B,R14BH]>;
def R15W : X86Reg<"r15w", 15, [R15B,R15BH]>;
}
+// APX only, requires REX2 or EVEX.
+let SubRegIndices = [sub_8bit, sub_8bit_hi_phony], CoveredBySubRegs = 1 in {
+let PositionOrder = 4 in {
+def R16W : X86Reg<"r16w", 16, [R16B,R16BH]>;
+def R17W : X86Reg<"r17w", 17, [R17B,R17BH]>;
+def R18W : X86Reg<"r18w", 18, [R18B,R18BH]>;
+def R19W : X86Reg<"r19w", 19, [R19B,R19BH]>;
+def R20W : X86Reg<"r20w", 20, [R20B,R20BH]>;
+def R21W : X86Reg<"r21w", 21, [R21B,R21BH]>;
+def R22W : X86Reg<"r22w", 22, [R22B,R22BH]>;
+def R23W : X86Reg<"r23w", 23, [R23B,R23BH]>;
+def R24W : X86Reg<"r24w", 24, [R24B,R24BH]>;
+def R25W : X86Reg<"r25w", 25, [R25B,R25BH]>;
+def R26W : X86Reg<"r26w", 26, [R26B,R26BH]>;
+def R27W : X86Reg<"r27w", 27, [R27B,R27BH]>;
+def R28W : X86Reg<"r28w", 28, [R28B,R28BH]>;
+def R29W : X86Reg<"r29w", 29, [R29B,R29BH]>;
+def R30W : X86Reg<"r30w", 30, [R30B,R30BH]>;
+def R31W : X86Reg<"r31w", 31, [R31B,R31BH]>;
+}
+}
// 32-bit registers
let SubRegIndices = [sub_16bit, sub_16bit_hi], CoveredBySubRegs = 1 in {
@@ -160,6 +255,27 @@ def R14D : X86Reg<"r14d", 14, [R14W,R14WH]>;
def R15D : X86Reg<"r15d", 15, [R15W,R15WH]>;
}
+// APX only, requires REX2 or EVEX.
+let SubRegIndices = [sub_16bit, sub_16bit_hi], CoveredBySubRegs = 1 in {
+let PositionOrder = 4 in {
+def R16D : X86Reg<"r16d", 16, [R16W,R16WH]>;
+def R17D : X86Reg<"r17d", 17, [R17W,R17WH]>;
+def R18D : X86Reg<"r18d", 18, [R18W,R18WH]>;
+def R19D : X86Reg<"r19d", 19, [R19W,R19WH]>;
+def R20D : X86Reg<"r20d", 20, [R20W,R20WH]>;
+def R21D : X86Reg<"r21d", 21, [R21W,R21WH]>;
+def R22D : X86Reg<"r22d", 22, [R22W,R22WH]>;
+def R23D : X86Reg<"r23d", 23, [R23W,R23WH]>;
+def R24D : X86Reg<"r24d", 24, [R24W,R24WH]>;
+def R25D : X86Reg<"r25d", 25, [R25W,R25WH]>;
+def R26D : X86Reg<"r26d", 26, [R26W,R26WH]>;
+def R27D : X86Reg<"r27d", 27, [R27W,R27WH]>;
+def R28D : X86Reg<"r28d", 28, [R28W,R28WH]>;
+def R29D : X86Reg<"r29d", 29, [R29W,R29WH]>;
+def R30D : X86Reg<"r30d", 30, [R30W,R30WH]>;
+def R31D : X86Reg<"r31d", 31, [R31W,R31WH]>;
+}
+}
// 64-bit registers, X86-64 only
let SubRegIndices = [sub_32bit] in {
def RAX : X86Reg<"rax", 0, [EAX]>, DwarfRegNum<[0, -2, -2]>;
@@ -181,6 +297,25 @@ def R13 : X86Reg<"r13", 13, [R13D]>, DwarfRegNum<[13, -2, -2]>;
def R14 : X86Reg<"r14", 14, [R14D]>, DwarfRegNum<[14, -2, -2]>;
def R15 : X86Reg<"r15", 15, [R15D]>, DwarfRegNum<[15, -2, -2]>;
def RIP : X86Reg<"rip", 0, [EIP]>, DwarfRegNum<[16, -2, -2]>;
+// APX only, requires REX2 or EVEX.
+let PositionOrder = 4 in {
+def R16 : X86Reg<"r16", 16, [R16D]>, DwarfRegNum<[130, -2, -2]>;
+def R17 : X86Reg<"r17", 17, [R17D]>, DwarfRegNum<[131, -2, -2]>;
+def R18 : X86Reg<"r18", 18, [R18D]>, DwarfRegNum<[132, -2, -2]>;
+def R19 : X86Reg<"r19", 19, [R19D]>, DwarfRegNum<[133, -2, -2]>;
+def R20 : X86Reg<"r20", 20, [R20D]>, DwarfRegNum<[134, -2, -2]>;
+def R21 : X86Reg<"r21", 21, [R21D]>, DwarfRegNum<[135, -2, -2]>;
+def R22 : X86Reg<"r22", 22, [R22D]>, DwarfRegNum<[136, -2, -2]>;
+def R23 : X86Reg<"r23", 23, [R23D]>, DwarfRegNum<[137, -2, -2]>;
+def R24 : X86Reg<"r24", 24, [R24D]>, DwarfRegNum<[138, -2, -2]>;
+def R25 : X86Reg<"r25", 25, [R25D]>, DwarfRegNum<[139, -2, -2]>;
+def R26 : X86Reg<"r26", 26, [R26D]>, DwarfRegNum<[140, -2, -2]>;
+def R27 : X86Reg<"r27", 27, [R27D]>, DwarfRegNum<[141, -2, -2]>;
+def R28 : X86Reg<"r28", 28, [R28D]>, DwarfRegNum<[142, -2, -2]>;
+def R29 : X86Reg<"r29", 29, [R29D]>, DwarfRegNum<[143, -2, -2]>;
+def R30 : X86Reg<"r30", 30, [R30D]>, DwarfRegNum<[144, -2, -2]>;
+def R31 : X86Reg<"r31", 31, [R31D]>, DwarfRegNum<[145, -2, -2]>;
+}
}
// MMX Registers. These are actually aliased to ST0 .. ST7
@@ -407,9 +542,11 @@ def SSP : X86Reg<"ssp", 0>;
// instruction requiring a REX prefix, while SIL, DIL, BPL, R8D, etc.
// require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d"
// cannot be encoded.
-def GR8 : RegisterClass<"X86", [i8], 8,
+def GR8 : RegisterClass<"X86", [i8], 8,
(add AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL,
- R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B)> {
+ R8B, R9B, R10B, R11B, R16B, R17B, R18B, R19B, R20B,
+ R21B, R22B, R23B, R24B, R25B, R26B, R27B, R28B, R29B,
+ R30B, R31B, R14B, R15B, R12B, R13B)> {
let AltOrders = [(sub GR8, AH, BH, CH, DH)];
let AltOrderSelect = [{
return MF.getSubtarget<X86Subtarget>().is64Bit();
@@ -417,23 +554,28 @@ def GR8 : RegisterClass<"X86", [i8], 8,
}
let isAllocatable = 0 in
-def GRH8 : RegisterClass<"X86", [i8], 8,
+def GRH8 : RegisterClass<"X86", [i8], 8,
(add SIH, DIH, BPH, SPH, R8BH, R9BH, R10BH, R11BH,
- R12BH, R13BH, R14BH, R15BH)>;
-
+ R12BH, R13BH, R14BH, R15BH, R16BH, R17BH, R18BH,
+ R19BH, R20BH, R21BH, R22BH, R23BH, R24BH, R25BH,
+ R26BH, R27BH, R28BH, R29BH, R30BH, R31BH)>;
def GR16 : RegisterClass<"X86", [i16], 16,
- (add AX, CX, DX, SI, DI, BX, BP, SP,
- R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W)>;
+ (add AX, CX, DX, SI, DI, BX, BP, SP, R8W, R9W, R10W,
+ R11W, R16W, R17W, R18W, R19W, R20W, R21W, R22W, R23W,
+ R24W, R25W, R26W, R27W, R28W, R29W, R30W, R31W, R14W,
+ R15W, R12W, R13W)>;
let isAllocatable = 0 in
def GRH16 : RegisterClass<"X86", [i16], 16,
- (add HAX, HCX, HDX, HSI, HDI, HBX, HBP, HSP, HIP,
- R8WH, R9WH, R10WH, R11WH, R12WH, R13WH, R14WH,
- R15WH)>;
-
+ (add HAX, HCX, HDX, HSI, HDI, HBX, HBP, HSP, HIP, R8WH,
+ R9WH, R10WH, R11WH, R12WH, R13WH, R14WH, R15WH, R16WH,
+ R17WH, R18WH, R19WH, R20WH, R21WH, R22WH, R23WH, R24WH,
+ R25WH, R26WH, R27WH, R28WH, R29WH, R30WH, R31WH)>;
def GR32 : RegisterClass<"X86", [i32], 32,
- (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
- R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D)>;
+ (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP, R8D, R9D,
+ R10D, R11D, R16D, R17D, R18D, R19D, R20D, R21D, R22D,
+ R23D, R24D, R25D, R26D, R27D, R28D, R29D, R30D, R31D,
+ R14D, R15D, R12D, R13D)>;
// GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since
// RIP isn't really a register and it can't be used anywhere except in an
@@ -441,8 +583,9 @@ def GR32 : RegisterClass<"X86", [i32], 32,
// FIXME: it *does* cause trouble - CheckBaseRegAndIndexReg() has extra
// tests because of the inclusion of RIP in this register class.
def GR64 : RegisterClass<"X86", [i64], 64,
- (add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
- RBX, R14, R15, R12, R13, RBP, RSP, RIP)>;
+ (add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, R16, R17,
+ R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ R30, R31, RBX, R14, R15, R12, R13, RBP, RSP, RIP)>;
// GR64PLTSafe - 64-bit GPRs without R10, R11, RSP and RIP. Could be used when
// emitting code for intrinsics, which use implict input registers.
@@ -508,6 +651,27 @@ def GR32_NOREX : RegisterClass<"X86", [i32], 32,
// GR64_NOREX - GR64 registers which do not require a REX prefix.
def GR64_NOREX : RegisterClass<"X86", [i64], 64,
(add RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP)>;
+// GeneratePressureSet = 0 here is a temporary workaround for lots of
+// LIT fail. Whether enabling in the future still needs discussion.
+let GeneratePressureSet = 0 in {
+// GR8_NOREX2 - GR8 registers which do not require a REX2 prefix.
+def GR8_NOREX2 : RegisterClass<"X86", [i8], 8,
+ (sub GR8, (sequence "R%uB", 16, 31))> {
+ let AltOrders = [(sub GR8_NOREX2, AH, BH, CH, DH)];
+ let AltOrderSelect = [{
+ return MF.getSubtarget<X86Subtarget>().is64Bit();
+ }];
+}
+// GR16_NOREX2 - GR16 registers which do not require a REX2 prefix.
+def GR16_NOREX2 : RegisterClass<"X86", [i16], 16,
+ (sub GR16, (sequence "R%uW", 16, 31))>;
+// GR32_NOREX2 - GR32 registers which do not require a REX2 prefix.
+def GR32_NOREX2 : RegisterClass<"X86", [i32], 32,
+ (sub GR32, (sequence "R%uD", 16, 31))>;
+// GR64_NOREX2 - GR64 registers which do not require a REX2 prefix.
+def GR64_NOREX2 : RegisterClass<"X86", [i64], 64,
+ (sub GR64, (sequence "R%u", 16, 31))>;
+}
// GR32_NOSP - GR32 registers except ESP.
def GR32_NOSP : RegisterClass<"X86", [i32], 32, (sub GR32, ESP)>;
@@ -523,6 +687,15 @@ def GR32_NOREX_NOSP : RegisterClass<"X86", [i32], 32,
// GR64_NOREX_NOSP - GR64_NOREX registers except RSP.
def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
(and GR64_NOREX, GR64_NOSP)>;
+let GeneratePressureSet = 0 in {
+// GR32_NOREX2_NOSP - GR32_NOREX2 registers except ESP.
+def GR32_NOREX2_NOSP : RegisterClass<"X86", [i32], 32,
+ (sub GR32_NOREX2, ESP)>;
+
+// GR64_NOREX2_NOSP - GR64_NOREX2 registers except RSP, RIP.
+def GR64_NOREX2_NOSP : RegisterClass<"X86", [i64], 64,
+ (sub GR64_NOREX2, RSP, RIP)>;
+}
// Register classes used for ABIs that use 32-bit address accesses,
// while using the whole x86_64 ISA.
diff --git a/llvm/...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/70958
More information about the llvm-commits
mailing list