[llvm] [X86] Support EGPR (R16-R31) for APX (PR #67702)
Shengchen Kan via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 28 09:43:34 PDT 2023
https://github.com/KanRobert created https://github.com/llvm/llvm-project/pull/67702
RFC: https://discourse.llvm.org/t/rfc-design-for-apx-feature-egpr-and-ndd-support/73031/4
1. Map R16-R31 to DWARF registers 130-145.
2. Make R16-R31 caller-saved registers.
3. Make R16-31 allocatable only when feature EGPR is supported
4. Make R16-31 availabe for instructions legacy maps 0/1 and EVEX space, except XSAVE*/XRSTOR
Explanations for some seemingly unrelated changes:
inline-asm-registers.mir, statepoint-invoke-ra-enter-at-end.mir:
The immediate (TargetInstrInfo.cpp:1612) used for the regdef/reguse is the encoding for the register
class in the enum generated by tablegen. This encoding will change
any time a new register class is added. Since the number is part
of the input, this means it can become stale.
seh-directive-errors.s:
R16-R31 makes ".seh_pushreg 17" legal
musttail-varargs.ll:
It seems some LLVM passes use the number of registers rather the number of allocatable registers as heuristic.
>From 1935d2d8e7a051a5163d5547bf6a96e8dbf1f257 Mon Sep 17 00:00:00 2001
From: Shengchen Kan <shengchen.kan at intel.com>
Date: Thu, 28 Sep 2023 14:03:18 +0800
Subject: [PATCH] [X86] Support EGPR (R16-R31) for APX
RFC: https://discourse.llvm.org/t/rfc-design-for-apx-feature-egpr-and-ndd-support/73031/4
1. Map R16-R31 to DWARF registers 130-145.
2. Make R16-R31 caller-saved registers.
3. Make R16-31 allocatable only when feature EGPR is supported
4. Make R16-31 availabe for instructions legacy maps 0/1 and EVEX space, except XSAVE*/XRSTOR
Explanations for some seemingly unrelated changes:
inline-asm-registers.mir, statepoint-invoke-ra-enter-at-end.mir:
The immediate (TargetInstrInfo.cpp:1612) used for the regdef/reguse is the encoding for the register
class in the enum generated by tablegen. This encoding will change
any time a new register class is added. Since the number is part
of the input, this means it can become stale.
seh-directive-errors.s:
R16-R31 makes ".seh_pushreg 17" legal
musttail-varargs.ll:
It seems some LLVM passes use the number of registers rather the number of allocatable registers as heuristic.
---
.../lib/Target/X86/MCTargetDesc/X86BaseInfo.h | 38 ++++
llvm/lib/Target/X86/X86.td | 2 +
llvm/lib/Target/X86/X86InstrInfo.cpp | 31 +++
llvm/lib/Target/X86/X86InstrInfo.h | 5 +
llvm/lib/Target/X86/X86RegisterInfo.cpp | 12 ++
llvm/lib/Target/X86/X86RegisterInfo.td | 189 ++++++++++++++++--
.../CodeGen/MIR/X86/inline-asm-registers.mir | 8 +-
llvm/test/CodeGen/X86/apx/no-rex2-general.ll | 81 ++++++++
.../CodeGen/X86/apx/no-rex2-pseudo-amx.ll | 18 ++
.../CodeGen/X86/apx/no-rex2-pseudo-x87.ll | 18 ++
llvm/test/CodeGen/X86/apx/no-rex2-special.ll | 70 +++++++
llvm/test/CodeGen/X86/ipra-reg-usage.ll | 2 +-
llvm/test/CodeGen/X86/musttail-varargs.ll | 48 ++---
.../X86/statepoint-invoke-ra-enter-at-end.mir | 4 +-
llvm/test/MC/AsmParser/seh-directive-errors.s | 2 +-
llvm/test/MC/X86/apx/cfi-reg.s | 41 ++++
16 files changed, 524 insertions(+), 45 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/apx/no-rex2-general.ll
create mode 100644 llvm/test/CodeGen/X86/apx/no-rex2-pseudo-amx.ll
create mode 100644 llvm/test/CodeGen/X86/apx/no-rex2-pseudo-x87.ll
create mode 100644 llvm/test/CodeGen/X86/apx/no-rex2-special.ll
create mode 100644 llvm/test/MC/X86/apx/cfi-reg.s
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index e2293fe30561fb4..4c5b49fcc1bcd9b 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -1208,6 +1208,44 @@ namespace X86II {
return false;
}
+ inline bool canUseApxExtendedReg(const MCInstrDesc &Desc) {
+ uint64_t TSFlags = Desc.TSFlags;
+ uint64_t Encoding = TSFlags & EncodingMask;
+ // EVEX can always use egpr.
+ if (Encoding == X86II::EVEX)
+ return true;
+
+ // MAP OB/TB in legacy encoding space can always use egpr except
+ // XSAVE*/XRSTOR*.
+ unsigned Opcode = Desc.Opcode;
+ bool IsSpecial = false;
+ switch (Opcode) {
+ default:
+ // To be conservative, egpr is not used for all pseudo instructions
+ // because we are not sure what instruction it will become.
+ // FIXME: Could we improve it in X86ExpandPseudo?
+ IsSpecial = isPseudo(TSFlags);
+ break;
+ case X86::XSAVE:
+ case X86::XSAVE64:
+ case X86::XSAVEOPT:
+ case X86::XSAVEOPT64:
+ case X86::XSAVEC:
+ case X86::XSAVEC64:
+ case X86::XSAVES:
+ case X86::XSAVES64:
+ case X86::XRSTOR:
+ case X86::XRSTOR64:
+ case X86::XRSTORS:
+ case X86::XRSTORS64:
+ IsSpecial = true;
+ break;
+ }
+ uint64_t OpMap = TSFlags & X86II::OpMapMask;
+ return !Encoding && (OpMap == X86II::OB || OpMap == X86II::TB) &&
+ !IsSpecial;
+ }
+
/// \returns true if the MemoryOperand is a 32 extended (zmm16 or higher)
/// registers, e.g. zmm21, etc.
static inline bool is32ExtendedReg(unsigned RegNo) {
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 64f91ae90e2b0ce..6c5fb49ff9f9504 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -331,6 +331,8 @@ def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true",
"Support movdiri instruction (direct store integer)">;
def FeatureMOVDIR64B : SubtargetFeature<"movdir64b", "HasMOVDIR64B", "true",
"Support movdir64b instruction (direct store 64 bytes)">;
+def FeatureEGPR : SubtargetFeature<"egpr", "HasEGPR", "true",
+ "Support extended general purpose register">;
// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
// "string operations"). See "REP String Enhancement" in the Intel Software
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 4320a0e94b7a71f..6d1da855761cdd5 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -92,6 +92,37 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
Subtarget(STI), RI(STI.getTargetTriple()) {
}
+const TargetRegisterClass *
+X86InstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
+ const TargetRegisterInfo *TRI,
+ const MachineFunction &MF) const {
+ auto *RC = TargetInstrInfo::getRegClass(MCID, OpNum, TRI, MF);
+ // If the target does not have egpr, then r16-r31 will be resereved for all
+ // instructions.
+ if (!RC || !Subtarget.hasEGPR())
+ return RC;
+
+ if (X86II::canUseApxExtendedReg(MCID))
+ return RC;
+
+ switch (RC->getID()) {
+ default:
+ return RC;
+ case X86::GR8RegClassID:
+ return &X86::GR8_NOREX2RegClass;
+ case X86::GR16RegClassID:
+ return &X86::GR16_NOREX2RegClass;
+ case X86::GR32RegClassID:
+ return &X86::GR32_NOREX2RegClass;
+ case X86::GR64RegClassID:
+ return &X86::GR64_NOREX2RegClass;
+ case X86::GR32_NOSPRegClassID:
+ return &X86::GR32_NOREX2_NOSPRegClass;
+ case X86::GR64_NOSPRegClassID:
+ return &X86::GR64_NOREX2_NOSPRegClass;
+ }
+}
+
bool
X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
Register &SrcReg, Register &DstReg,
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index 8119302f73e8b36..e106b9df8850ba1 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -150,6 +150,11 @@ class X86InstrInfo final : public X86GenInstrInfo {
public:
explicit X86InstrInfo(X86Subtarget &STI);
+ const TargetRegisterClass *
+ getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
+ const TargetRegisterInfo *TRI,
+ const MachineFunction &MF) const override;
+
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 3504ca2b5743f88..7fcc2a1acfd963d 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -158,6 +158,10 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
case X86::GR16RegClassID:
case X86::GR32RegClassID:
case X86::GR64RegClassID:
+ case X86::GR8_NOREX2RegClassID:
+ case X86::GR16_NOREX2RegClassID:
+ case X86::GR32_NOREX2RegClassID:
+ case X86::GR64_NOREX2RegClassID:
case X86::RFP32RegClassID:
case X86::RFP64RegClassID:
case X86::RFP80RegClassID:
@@ -610,6 +614,14 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
}
}
+ // Reserve the extended general purpose registers.
+ if (!Is64Bit || !MF.getSubtarget<X86Subtarget>().hasEGPR()) {
+ for (unsigned n = 0; n != 16; ++n) {
+ for (MCRegAliasIterator AI(X86::R16 + n, this, true); AI.isValid(); ++AI)
+ Reserved.set(*AI);
+ }
+ }
+
assert(checkAllSuperRegsMarked(Reserved,
{X86::SIL, X86::DIL, X86::BPL, X86::SPL,
X86::SIH, X86::DIH, X86::BPH, X86::SPH}));
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td
index 1e6477e658b9d10..fe251b75e9c0c4e 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -73,6 +73,42 @@ def R12B : X86Reg<"r12b", 12>;
def R13B : X86Reg<"r13b", 13>;
def R14B : X86Reg<"r14b", 14>;
def R15B : X86Reg<"r15b", 15>;
+// RAGreedy prefers to select a cheaper register
+// For x86,
+// Cost(caller-save reg) < Cost(callee-save reg)
+// b/c callee-save register needs push/pop in prolog/epilog.
+// If both registers are callee-saved or caller-saved,
+// Cost(short-encoding reg) < Cost(long-encoding reg)
+//
+// To achieve this, we do the following things:
+// 1. Set CostPerUse=1 for registers that need prefix
+// 2. Consider callee-save register is never cheaper than a register w/ cost 1
+// 3. List caller-save register before callee-save regsiter in RegisterClass
+// or AllocationOrder
+//
+// NOTE:
+// D133902 stopped assigning register costs for R8-R15, which brought gain
+// and regression. We don't know if we should assign cost to R16-R31 w/o
+// performance data.
+// TODO:
+// Update the comment/cost after tuning.
+// APX only, requires REX2 or EVEX.
+def R16B : X86Reg<"r16b", 16>;
+def R17B : X86Reg<"r17b", 17>;
+def R18B : X86Reg<"r18b", 18>;
+def R19B : X86Reg<"r19b", 19>;
+def R20B : X86Reg<"r20b", 20>;
+def R21B : X86Reg<"r21b", 21>;
+def R22B : X86Reg<"r22b", 22>;
+def R23B : X86Reg<"r23b", 23>;
+def R24B : X86Reg<"r24b", 24>;
+def R25B : X86Reg<"r25b", 25>;
+def R26B : X86Reg<"r26b", 26>;
+def R27B : X86Reg<"r27b", 27>;
+def R28B : X86Reg<"r28b", 28>;
+def R29B : X86Reg<"r29b", 29>;
+def R30B : X86Reg<"r30b", 30>;
+def R31B : X86Reg<"r31b", 31>;
let isArtificial = 1 in {
// High byte of the low 16 bits of the super-register:
@@ -88,6 +124,22 @@ def R12BH : X86Reg<"", -1>;
def R13BH : X86Reg<"", -1>;
def R14BH : X86Reg<"", -1>;
def R15BH : X86Reg<"", -1>;
+def R16BH : X86Reg<"", -1>;
+def R17BH : X86Reg<"", -1>;
+def R18BH : X86Reg<"", -1>;
+def R19BH : X86Reg<"", -1>;
+def R20BH : X86Reg<"", -1>;
+def R21BH : X86Reg<"", -1>;
+def R22BH : X86Reg<"", -1>;
+def R23BH : X86Reg<"", -1>;
+def R24BH : X86Reg<"", -1>;
+def R25BH : X86Reg<"", -1>;
+def R26BH : X86Reg<"", -1>;
+def R27BH : X86Reg<"", -1>;
+def R28BH : X86Reg<"", -1>;
+def R29BH : X86Reg<"", -1>;
+def R30BH : X86Reg<"", -1>;
+def R31BH : X86Reg<"", -1>;
// High word of the low 32 bits of the super-register:
def HAX : X86Reg<"", -1>;
def HDX : X86Reg<"", -1>;
@@ -106,6 +158,22 @@ def R12WH : X86Reg<"", -1>;
def R13WH : X86Reg<"", -1>;
def R14WH : X86Reg<"", -1>;
def R15WH : X86Reg<"", -1>;
+def R16WH : X86Reg<"", -1>;
+def R17WH : X86Reg<"", -1>;
+def R18WH : X86Reg<"", -1>;
+def R19WH : X86Reg<"", -1>;
+def R20WH : X86Reg<"", -1>;
+def R21WH : X86Reg<"", -1>;
+def R22WH : X86Reg<"", -1>;
+def R23WH : X86Reg<"", -1>;
+def R24WH : X86Reg<"", -1>;
+def R25WH : X86Reg<"", -1>;
+def R26WH : X86Reg<"", -1>;
+def R27WH : X86Reg<"", -1>;
+def R28WH : X86Reg<"", -1>;
+def R29WH : X86Reg<"", -1>;
+def R30WH : X86Reg<"", -1>;
+def R31WH : X86Reg<"", -1>;
}
// 16-bit registers
@@ -134,6 +202,25 @@ def R13W : X86Reg<"r13w", 13, [R13B,R13BH]>;
def R14W : X86Reg<"r14w", 14, [R14B,R14BH]>;
def R15W : X86Reg<"r15w", 15, [R15B,R15BH]>;
}
+// APX only, requires REX2 or EVEX.
+let SubRegIndices = [sub_8bit, sub_8bit_hi_phony], CoveredBySubRegs = 1 in {
+def R16W : X86Reg<"r16w", 16, [R16B,R16BH]>;
+def R17W : X86Reg<"r17w", 17, [R17B,R17BH]>;
+def R18W : X86Reg<"r18w", 18, [R18B,R18BH]>;
+def R19W : X86Reg<"r19w", 19, [R19B,R19BH]>;
+def R20W : X86Reg<"r20w", 20, [R20B,R20BH]>;
+def R21W : X86Reg<"r21w", 21, [R21B,R21BH]>;
+def R22W : X86Reg<"r22w", 22, [R22B,R22BH]>;
+def R23W : X86Reg<"r23w", 23, [R23B,R23BH]>;
+def R24W : X86Reg<"r24w", 24, [R24B,R24BH]>;
+def R25W : X86Reg<"r25w", 25, [R25B,R25BH]>;
+def R26W : X86Reg<"r26w", 26, [R26B,R26BH]>;
+def R27W : X86Reg<"r27w", 27, [R27B,R27BH]>;
+def R28W : X86Reg<"r28w", 28, [R28B,R28BH]>;
+def R29W : X86Reg<"r29w", 29, [R29B,R29BH]>;
+def R30W : X86Reg<"r30w", 30, [R30B,R30BH]>;
+def R31W : X86Reg<"r31w", 31, [R31B,R31BH]>;
+}
// 32-bit registers
let SubRegIndices = [sub_16bit, sub_16bit_hi], CoveredBySubRegs = 1 in {
@@ -160,6 +247,25 @@ def R14D : X86Reg<"r14d", 14, [R14W,R14WH]>;
def R15D : X86Reg<"r15d", 15, [R15W,R15WH]>;
}
+// APX only, requires REX2 or EVEX.
+let SubRegIndices = [sub_16bit, sub_16bit_hi], CoveredBySubRegs = 1 in {
+def R16D : X86Reg<"r16d", 16, [R16W,R16WH]>;
+def R17D : X86Reg<"r17d", 17, [R17W,R17WH]>;
+def R18D : X86Reg<"r18d", 18, [R18W,R18WH]>;
+def R19D : X86Reg<"r19d", 19, [R19W,R19WH]>;
+def R20D : X86Reg<"r20d", 20, [R20W,R20WH]>;
+def R21D : X86Reg<"r21d", 21, [R21W,R21WH]>;
+def R22D : X86Reg<"r22d", 22, [R22W,R22WH]>;
+def R23D : X86Reg<"r23d", 23, [R23W,R23WH]>;
+def R24D : X86Reg<"r24d", 24, [R24W,R24WH]>;
+def R25D : X86Reg<"r25d", 25, [R25W,R25WH]>;
+def R26D : X86Reg<"r26d", 26, [R26W,R26WH]>;
+def R27D : X86Reg<"r27d", 27, [R27W,R27WH]>;
+def R28D : X86Reg<"r28d", 28, [R28W,R28WH]>;
+def R29D : X86Reg<"r29d", 29, [R29W,R29WH]>;
+def R30D : X86Reg<"r30d", 30, [R30W,R30WH]>;
+def R31D : X86Reg<"r31d", 31, [R31W,R31WH]>;
+}
// 64-bit registers, X86-64 only
let SubRegIndices = [sub_32bit] in {
def RAX : X86Reg<"rax", 0, [EAX]>, DwarfRegNum<[0, -2, -2]>;
@@ -181,6 +287,23 @@ def R13 : X86Reg<"r13", 13, [R13D]>, DwarfRegNum<[13, -2, -2]>;
def R14 : X86Reg<"r14", 14, [R14D]>, DwarfRegNum<[14, -2, -2]>;
def R15 : X86Reg<"r15", 15, [R15D]>, DwarfRegNum<[15, -2, -2]>;
def RIP : X86Reg<"rip", 0, [EIP]>, DwarfRegNum<[16, -2, -2]>;
+// APX only, requires REX2 or EVEX.
+def R16 : X86Reg<"r16", 16, [R16D]>, DwarfRegNum<[130, -2, -2]>;
+def R17 : X86Reg<"r17", 17, [R17D]>, DwarfRegNum<[131, -2, -2]>;
+def R18 : X86Reg<"r18", 18, [R18D]>, DwarfRegNum<[132, -2, -2]>;
+def R19 : X86Reg<"r19", 19, [R19D]>, DwarfRegNum<[133, -2, -2]>;
+def R20 : X86Reg<"r20", 20, [R20D]>, DwarfRegNum<[134, -2, -2]>;
+def R21 : X86Reg<"r21", 21, [R21D]>, DwarfRegNum<[135, -2, -2]>;
+def R22 : X86Reg<"r22", 22, [R22D]>, DwarfRegNum<[136, -2, -2]>;
+def R23 : X86Reg<"r23", 23, [R23D]>, DwarfRegNum<[137, -2, -2]>;
+def R24 : X86Reg<"r24", 24, [R24D]>, DwarfRegNum<[138, -2, -2]>;
+def R25 : X86Reg<"r25", 25, [R25D]>, DwarfRegNum<[139, -2, -2]>;
+def R26 : X86Reg<"r26", 26, [R26D]>, DwarfRegNum<[140, -2, -2]>;
+def R27 : X86Reg<"r27", 27, [R27D]>, DwarfRegNum<[141, -2, -2]>;
+def R28 : X86Reg<"r28", 28, [R28D]>, DwarfRegNum<[142, -2, -2]>;
+def R29 : X86Reg<"r29", 29, [R29D]>, DwarfRegNum<[143, -2, -2]>;
+def R30 : X86Reg<"r30", 30, [R30D]>, DwarfRegNum<[144, -2, -2]>;
+def R31 : X86Reg<"r31", 31, [R31D]>, DwarfRegNum<[145, -2, -2]>;
}
// MMX Registers. These are actually aliased to ST0 .. ST7
@@ -390,9 +513,11 @@ def SSP : X86Reg<"ssp", 0>;
// instruction requiring a REX prefix, while SIL, DIL, BPL, R8D, etc.
// require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d"
// cannot be encoded.
-def GR8 : RegisterClass<"X86", [i8], 8,
+def GR8 : RegisterClass<"X86", [i8], 8,
(add AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL,
- R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B)> {
+ R8B, R9B, R10B, R11B, R16B, R17B, R18B, R19B, R20B,
+ R21B, R22B, R23B, R24B, R25B, R26B, R27B, R28B, R29B,
+ R30B, R31B, R14B, R15B, R12B, R13B)> {
let AltOrders = [(sub GR8, AH, BH, CH, DH)];
let AltOrderSelect = [{
return MF.getSubtarget<X86Subtarget>().is64Bit();
@@ -400,23 +525,30 @@ def GR8 : RegisterClass<"X86", [i8], 8,
}
let isAllocatable = 0 in
-def GRH8 : RegisterClass<"X86", [i8], 8,
+def GRH8 : RegisterClass<"X86", [i8], 8,
(add SIH, DIH, BPH, SPH, R8BH, R9BH, R10BH, R11BH,
- R12BH, R13BH, R14BH, R15BH)>;
+ R12BH, R13BH, R14BH, R15BH, R16BH, R17BH, R18BH,
+ R19BH, R20BH, R21BH, R22BH, R23BH, R24BH, R25BH,
+ R26BH, R27BH, R28BH, R29BH, R30BH, R31BH)>;
def GR16 : RegisterClass<"X86", [i16], 16,
- (add AX, CX, DX, SI, DI, BX, BP, SP,
- R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W)>;
+ (add AX, CX, DX, SI, DI, BX, BP, SP, R8W, R9W, R10W,
+ R11W, R16W, R17W, R18W, R19W, R20W, R21W, R22W, R23W,
+ R24W, R25W, R26W, R27W, R28W, R29W, R30W, R31W, R14W,
+ R15W, R12W, R13W)>;
let isAllocatable = 0 in
def GRH16 : RegisterClass<"X86", [i16], 16,
- (add HAX, HCX, HDX, HSI, HDI, HBX, HBP, HSP, HIP,
- R8WH, R9WH, R10WH, R11WH, R12WH, R13WH, R14WH,
- R15WH)>;
+ (add HAX, HCX, HDX, HSI, HDI, HBX, HBP, HSP, HIP, R8WH,
+ R9WH, R10WH, R11WH, R12WH, R13WH, R14WH, R15WH, R16WH,
+ R17WH, R18WH, R19WH, R20WH, R21WH, R22WH, R23WH, R24WH,
+ R25WH, R26WH, R27WH, R28WH, R29WH, R30WH, R31WH)>;
def GR32 : RegisterClass<"X86", [i32], 32,
- (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
- R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D)>;
+ (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP, R8D, R9D,
+ R10D, R11D, R16D, R17D, R18D, R19D, R20D, R21D, R22D,
+ R23D, R24D, R25D, R26D, R27D, R28D, R29D, R30D, R31D,
+ R14D, R15D, R12D, R13D)>;
// GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since
// RIP isn't really a register and it can't be used anywhere except in an
@@ -424,8 +556,9 @@ def GR32 : RegisterClass<"X86", [i32], 32,
// FIXME: it *does* cause trouble - CheckBaseRegAndIndexReg() has extra
// tests because of the inclusion of RIP in this register class.
def GR64 : RegisterClass<"X86", [i64], 64,
- (add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
- RBX, R14, R15, R12, R13, RBP, RSP, RIP)>;
+ (add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, R16, R17,
+ R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ R30, R31, RBX, R14, R15, R12, R13, RBP, RSP, RIP)>;
// GR64PLTSafe - 64-bit GPRs without R10, R11, RSP and RIP. Could be used when
// emitting code for intrinsics, which use implict input registers.
@@ -491,6 +624,27 @@ def GR32_NOREX : RegisterClass<"X86", [i32], 32,
// GR64_NOREX - GR64 registers which do not require a REX prefix.
def GR64_NOREX : RegisterClass<"X86", [i64], 64,
(add RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP)>;
+// GeneratePressureSet = 0 here is a temporary workaround for lots of
+// LIT fail for xisa. Whether enabling in the future still needs discussion.
+let GeneratePressureSet = 0 in {
+// GR8_NOREX2 - GR8 registers which do not require a REX2 prefix.
+def GR8_NOREX2 : RegisterClass<"X86", [i8], 8,
+ (sub GR8, (sequence "R%uB", 16, 31))> {
+ let AltOrders = [(sub GR8_NOREX2, AH, BH, CH, DH)];
+ let AltOrderSelect = [{
+ return MF.getSubtarget<X86Subtarget>().is64Bit();
+ }];
+}
+// GR16_NOREX2 - GR16 registers which do not require a REX2 prefix.
+def GR16_NOREX2 : RegisterClass<"X86", [i16], 16,
+ (sub GR16, (sequence "R%uW", 16, 31))>;
+// GR32_NOREX2 - GR32 registers which do not require a REX2 prefix.
+def GR32_NOREX2 : RegisterClass<"X86", [i32], 32,
+ (sub GR32, (sequence "R%uD", 16, 31))>;
+// GR64_NOREX2 - GR64 registers which do not require a REX2 prefix.
+def GR64_NOREX2 : RegisterClass<"X86", [i64], 64,
+ (sub GR64, (sequence "R%u", 16, 31))>;
+}
// GR32_NOSP - GR32 registers except ESP.
def GR32_NOSP : RegisterClass<"X86", [i32], 32, (sub GR32, ESP)>;
@@ -506,6 +660,15 @@ def GR32_NOREX_NOSP : RegisterClass<"X86", [i32], 32,
// GR64_NOREX_NOSP - GR64_NOREX registers except RSP.
def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
(and GR64_NOREX, GR64_NOSP)>;
+let GeneratePressureSet = 0 in {
+// GR32_NOREX2_NOSP - GR32_NOREX2 registers except ESP.
+def GR32_NOREX2_NOSP : RegisterClass<"X86", [i32], 32,
+ (sub GR32_NOREX2, ESP)>;
+
+// GR64_NOREX2_NOSP - GR64_NOREX2 registers except RSP, RIP.
+def GR64_NOREX2_NOSP : RegisterClass<"X86", [i64], 64,
+ (sub GR64_NOREX2, RSP, RIP)>;
+}
// Register classes used for ABIs that use 32-bit address accesses,
// while using the whole x84_64 ISA.
diff --git a/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir b/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir
index f92d49cabdcdae3..2ac4d7cccac079b 100644
--- a/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir
+++ b/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir
@@ -28,8 +28,8 @@ body: |
liveins: $rdi, $rsi
; CHECK-LABEL: name: test
- ; CHECK: INLINEASM &foo, 0 /* attdialect */, 4521994 /* regdef:GR64 */, def $rsi, 4521994 /* regdef:GR64 */, def dead $rdi,
- INLINEASM &foo, 0, 4521994, def $rsi, 4521994, def dead $rdi, 2147549193, killed $rdi, 2147483657, killed $rsi, 12, implicit-def dead early-clobber $eflags
+ ; CHECK: INLINEASM &foo, 0 /* attdialect */, 4784138 /* regdef:GR64 */, def $rsi, 4784138 /* regdef:GR64 */, def dead $rdi,
+ INLINEASM &foo, 0, 4784138, def $rsi, 4784138, def dead $rdi, 2147549193, killed $rdi, 2147483657, killed $rsi, 12, implicit-def dead early-clobber $eflags
$rax = MOV64rr killed $rsi
RET64 killed $rax
...
@@ -45,8 +45,8 @@ body: |
; Verify that the register ties are preserved.
; CHECK-LABEL: name: test2
- ; CHECK: INLINEASM &foo, 0 /* attdialect */, 4521994 /* regdef:GR64 */, def $rsi, 4521994 /* regdef:GR64 */, def dead $rdi, 2147549193 /* reguse tiedto:$1 */, killed $rdi(tied-def 5), 2147483657 /* reguse tiedto:$0 */, killed $rsi(tied-def 3), 12 /* clobber */, implicit-def dead early-clobber $eflags
- INLINEASM &foo, 0, 4521994, def $rsi, 4521994, def dead $rdi, 2147549193, killed $rdi(tied-def 5), 2147483657, killed $rsi(tied-def 3), 12, implicit-def dead early-clobber $eflags
+ ; CHECK: INLINEASM &foo, 0 /* attdialect */, 4784138 /* regdef:GR64 */, def $rsi, 4784138 /* regdef:GR64 */, def dead $rdi, 2147549193 /* reguse tiedto:$1 */, killed $rdi(tied-def 5), 2147483657 /* reguse tiedto:$0 */, killed $rsi(tied-def 3), 12 /* clobber */, implicit-def dead early-clobber $eflags
+ INLINEASM &foo, 0, 4784138, def $rsi, 4784138, def dead $rdi, 2147549193, killed $rdi(tied-def 5), 2147483657, killed $rsi(tied-def 3), 12, implicit-def dead early-clobber $eflags
$rax = MOV64rr killed $rsi
RET64 killed $rax
...
diff --git a/llvm/test/CodeGen/X86/apx/no-rex2-general.ll b/llvm/test/CodeGen/X86/apx/no-rex2-general.ll
new file mode 100644
index 000000000000000..1f92883f4170865
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/no-rex2-general.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown -stop-after=x86-isel -mattr=+sse2,+ssse3,+egpr | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown -stop-after=x86-isel -mattr=+sse2,+ssse3,+egpr,+avx | FileCheck %s --check-prefix=AVX
+
+define i32 @map0(ptr nocapture noundef readonly %a, i64 noundef %b) {
+ ; SSE-LABEL: name: map0
+ ; SSE: bb.0.entry:
+ ; SSE-NEXT: liveins: $rdi, $rsi
+ ; SSE-NEXT: {{ $}}
+ ; SSE-NEXT: [[COPY:%[0-9]+]]:gr64_nosp = COPY $rsi
+ ; SSE-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi
+ ; SSE-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 4, [[COPY]], 0, $noreg :: (load (s32) from %ir.add.ptr)
+ ; SSE-NEXT: $eax = COPY [[MOV32rm]]
+ ; SSE-NEXT: RET 0, $eax
+ ; AVX-LABEL: name: map0
+ ; AVX: bb.0.entry:
+ ; AVX-NEXT: liveins: $rdi, $rsi
+ ; AVX-NEXT: {{ $}}
+ ; AVX-NEXT: [[COPY:%[0-9]+]]:gr64_nosp = COPY $rsi
+ ; AVX-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi
+ ; AVX-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 4, [[COPY]], 0, $noreg :: (load (s32) from %ir.add.ptr)
+ ; AVX-NEXT: $eax = COPY [[MOV32rm]]
+ ; AVX-NEXT: RET 0, $eax
+entry:
+ %add.ptr = getelementptr inbounds i32, ptr %a, i64 %b
+ %0 = load i32, ptr %add.ptr
+ ret i32 %0
+}
+
+define i32 @map1_or_vex(<2 x double> noundef %a) {
+ ; SSE-LABEL: name: map1_or_vex
+ ; SSE: bb.0.entry:
+ ; SSE-NEXT: liveins: $xmm0
+ ; SSE-NEXT: {{ $}}
+ ; SSE-NEXT: [[COPY:%[0-9]+]]:vr128 = COPY $xmm0
+ ; SSE-NEXT: [[CVTSD2SIrr_Int:%[0-9]+]]:gr32 = nofpexcept CVTSD2SIrr_Int [[COPY]], implicit $mxcsr
+ ; SSE-NEXT: $eax = COPY [[CVTSD2SIrr_Int]]
+ ; SSE-NEXT: RET 0, $eax
+ ; AVX-LABEL: name: map1_or_vex
+ ; AVX: bb.0.entry:
+ ; AVX-NEXT: liveins: $xmm0
+ ; AVX-NEXT: {{ $}}
+ ; AVX-NEXT: [[COPY:%[0-9]+]]:vr128 = COPY $xmm0
+ ; AVX-NEXT: [[VCVTSD2SIrr_Int:%[0-9]+]]:gr32_norex2 = nofpexcept VCVTSD2SIrr_Int [[COPY]], implicit $mxcsr
+ ; AVX-NEXT: $eax = COPY [[VCVTSD2SIrr_Int]]
+ ; AVX-NEXT: RET 0, $eax
+entry:
+ %0 = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a)
+ ret i32 %0
+}
+
+define <2 x i64> @map2_or_vex(ptr nocapture noundef readonly %b, i64 noundef %c) {
+ ; SSE-LABEL: name: map2_or_vex
+ ; SSE: bb.0.entry:
+ ; SSE-NEXT: liveins: $rdi, $rsi
+ ; SSE-NEXT: {{ $}}
+ ; SSE-NEXT: [[COPY:%[0-9]+]]:gr64_norex2_nosp = COPY $rsi
+ ; SSE-NEXT: [[COPY1:%[0-9]+]]:gr64_norex2 = COPY $rdi
+ ; SSE-NEXT: [[PABSBrm:%[0-9]+]]:vr128 = PABSBrm [[COPY1]], 4, [[COPY]], 0, $noreg :: (load (s128) from %ir.add.ptr)
+ ; SSE-NEXT: $xmm0 = COPY [[PABSBrm]]
+ ; SSE-NEXT: RET 0, $xmm0
+ ; AVX-LABEL: name: map2_or_vex
+ ; AVX: bb.0.entry:
+ ; AVX-NEXT: liveins: $rdi, $rsi
+ ; AVX-NEXT: {{ $}}
+ ; AVX-NEXT: [[COPY:%[0-9]+]]:gr64_norex2_nosp = COPY $rsi
+ ; AVX-NEXT: [[COPY1:%[0-9]+]]:gr64_norex2 = COPY $rdi
+ ; AVX-NEXT: [[VPABSBrm:%[0-9]+]]:vr128 = VPABSBrm [[COPY1]], 4, [[COPY]], 0, $noreg :: (load (s128) from %ir.add.ptr)
+ ; AVX-NEXT: $xmm0 = COPY [[VPABSBrm]]
+ ; AVX-NEXT: RET 0, $xmm0
+entry:
+ %add.ptr = getelementptr inbounds i32, ptr %b, i64 %c
+ %a = load <2 x i64>, ptr %add.ptr
+ %0 = bitcast <2 x i64> %a to <16 x i8>
+ %elt.abs.i = tail call <16 x i8> @llvm.abs.v16i8(<16 x i8> %0, i1 false)
+ %1 = bitcast <16 x i8> %elt.abs.i to <2 x i64>
+ ret <2 x i64> %1
+}
+
+declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>)
+declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1 immarg)
diff --git a/llvm/test/CodeGen/X86/apx/no-rex2-pseudo-amx.ll b/llvm/test/CodeGen/X86/apx/no-rex2-pseudo-amx.ll
new file mode 100644
index 000000000000000..e082becbc441d9f
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/no-rex2-pseudo-amx.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown -stop-after=x86-isel -mattr=+amx-tile,+egpr | FileCheck %s
+
+define dso_local void @amx(ptr noundef %data) {
+ ; CHECK-LABEL: name: amx
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: liveins: $rdi
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64_norex2 = COPY $rdi
+ ; CHECK-NEXT: [[MOV32ri64_:%[0-9]+]]:gr64_norex2_nosp = MOV32ri64 8
+ ; CHECK-NEXT: PTILELOADD 4, [[COPY]], 1, killed [[MOV32ri64_]], 0, $noreg
+ ; CHECK-NEXT: RET 0
+ entry:
+ call void @llvm.x86.tileloadd64(i8 4, ptr %data, i64 8)
+ ret void
+}
+
+declare void @llvm.x86.tileloadd64(i8 immarg, ptr, i64)
diff --git a/llvm/test/CodeGen/X86/apx/no-rex2-pseudo-x87.ll b/llvm/test/CodeGen/X86/apx/no-rex2-pseudo-x87.ll
new file mode 100644
index 000000000000000..10ec184516e7e71
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/no-rex2-pseudo-x87.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown -stop-after=x86-isel -mattr=-sse,+egpr | FileCheck %s
+
+define void @x87(ptr %0, ptr %1) {
+ ; CHECK-LABEL: name: x87
+ ; CHECK: bb.0 (%ir-block.2):
+ ; CHECK-NEXT: liveins: $rdi, $rsi
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64_norex2 = COPY $rsi
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64_norex2 = COPY $rdi
+ ; CHECK-NEXT: [[LD_Fp32m:%[0-9]+]]:rfp32 = nofpexcept LD_Fp32m [[COPY1]], 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load (s32) from %ir.0)
+ ; CHECK-NEXT: nofpexcept ST_Fp32m [[COPY]], 1, $noreg, 0, $noreg, killed [[LD_Fp32m]], implicit-def dead $fpsw, implicit $fpcw :: (store (s32) into %ir.1)
+ ; CHECK-NEXT: RET 0
+ %3 = load float, ptr %0
+ store float %3, ptr %1
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/X86/apx/no-rex2-special.ll b/llvm/test/CodeGen/X86/apx/no-rex2-special.ll
new file mode 100644
index 000000000000000..b277949697417d7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/no-rex2-special.ll
@@ -0,0 +1,70 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown -stop-after=x86-isel -mattr=+xsave,+egpr | FileCheck %s
+
+define void @test_xsave(ptr %ptr, i32 %hi, i32 %lo) {
+ ; CHECK-LABEL: name: test_xsave
+ ; CHECK: bb.0 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $rdi, $esi, $edx
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edx
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $esi
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_norex2 = COPY $rdi
+ ; CHECK-NEXT: $edx = COPY [[COPY1]]
+ ; CHECK-NEXT: $eax = COPY [[COPY]]
+ ; CHECK-NEXT: XSAVE [[COPY2]], 1, $noreg, 0, $noreg, implicit $edx, implicit $eax
+ ; CHECK-NEXT: RET 0
+ call void @llvm.x86.xsave(ptr %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xsave(ptr, i32, i32)
+
+define void @test_xsave64(ptr %ptr, i32 %hi, i32 %lo) {
+ ; CHECK-LABEL: name: test_xsave64
+ ; CHECK: bb.0 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $rdi, $esi, $edx
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edx
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $esi
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_norex2 = COPY $rdi
+ ; CHECK-NEXT: $edx = COPY [[COPY1]]
+ ; CHECK-NEXT: $eax = COPY [[COPY]]
+ ; CHECK-NEXT: XSAVE64 [[COPY2]], 1, $noreg, 0, $noreg, implicit $edx, implicit $eax
+ ; CHECK-NEXT: RET 0
+ call void @llvm.x86.xsave64(ptr %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xsave64(ptr, i32, i32)
+
+define void @test_xrstor(ptr %ptr, i32 %hi, i32 %lo) {
+ ; CHECK-LABEL: name: test_xrstor
+ ; CHECK: bb.0 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $rdi, $esi, $edx
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edx
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $esi
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_norex2 = COPY $rdi
+ ; CHECK-NEXT: $edx = COPY [[COPY1]]
+ ; CHECK-NEXT: $eax = COPY [[COPY]]
+ ; CHECK-NEXT: XRSTOR [[COPY2]], 1, $noreg, 0, $noreg, implicit $edx, implicit $eax
+ ; CHECK-NEXT: RET 0
+ call void @llvm.x86.xrstor(ptr %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xrstor(ptr, i32, i32)
+
+define void @test_xrstor64(ptr %ptr, i32 %hi, i32 %lo) {
+ ; CHECK-LABEL: name: test_xrstor64
+ ; CHECK: bb.0 (%ir-block.0):
+ ; CHECK-NEXT: liveins: $rdi, $esi, $edx
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edx
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $esi
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_norex2 = COPY $rdi
+ ; CHECK-NEXT: $edx = COPY [[COPY1]]
+ ; CHECK-NEXT: $eax = COPY [[COPY]]
+ ; CHECK-NEXT: XRSTOR64 [[COPY2]], 1, $noreg, 0, $noreg, implicit $edx, implicit $eax
+ ; CHECK-NEXT: RET 0
+ call void @llvm.x86.xrstor64(ptr %ptr, i32 %hi, i32 %lo)
+ ret void;
+}
+declare void @llvm.x86.xrstor64(ptr, i32, i32)
diff --git a/llvm/test/CodeGen/X86/ipra-reg-usage.ll b/llvm/test/CodeGen/X86/ipra-reg-usage.ll
index 36c4d6eff001885..1a78b7dc0902e12 100644
--- a/llvm/test/CodeGen/X86/ipra-reg-usage.ll
+++ b/llvm/test/CodeGen/X86/ipra-reg-usage.ll
@@ -3,7 +3,7 @@
target triple = "x86_64-unknown-unknown"
declare void @bar1()
define preserve_allcc void @foo()#0 {
-; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $esp $fpcw $fpsw $fs $fs_base $gs $gs_base $hip $hsp $ip $mxcsr $rflags $rip $riz $rsp $sp $sph $spl $ss $ssp $tmmcfg $_eflags $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $tmm0 $tmm1 $tmm2 $tmm3 $tmm4 $tmm5 $tmm6 $tmm7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $r11b $r11bh $r11d $r11w $r11wh $k0_k1 $k2_k3 $k4_k5 $k6_k7
+; CHECK: foo Clobbered Registers: $cs $df $ds $eflags $eip $eiz $es $esp $fpcw $fpsw $fs $fs_base $gs $gs_base $hip $hsp $ip $mxcsr $rflags $rip $riz $rsp $sp $sph $spl $ss $ssp $tmmcfg $_eflags $cr0 $cr1 $cr2 $cr3 $cr4 $cr5 $cr6 $cr7 $cr8 $cr9 $cr10 $cr11 $cr12 $cr13 $cr14 $cr15 $dr0 $dr1 $dr2 $dr3 $dr4 $dr5 $dr6 $dr7 $dr8 $dr9 $dr10 $dr11 $dr12 $dr13 $dr14 $dr15 $fp0 $fp1 $fp2 $fp3 $fp4 $fp5 $fp6 $fp7 $k0 $k1 $k2 $k3 $k4 $k5 $k6 $k7 $mm0 $mm1 $mm2 $mm3 $mm4 $mm5 $mm6 $mm7 $r11 $r16 $r17 $r18 $r19 $r20 $r21 $r22 $r23 $r24 $r25 $r26 $r27 $r28 $r29 $r30 $r31 $st0 $st1 $st2 $st3 $st4 $st5 $st6 $st7 $tmm0 $tmm1 $tmm2 $tmm3 $tmm4 $tmm5 $tmm6 $tmm7 $xmm16 $xmm17 $xmm18 $xmm19 $xmm20 $xmm21 $xmm22 $xmm23 $xmm24 $xmm25 $xmm26 $xmm27 $xmm28 $xmm29 $xmm30 $xmm31 $ymm0 $ymm1 $ymm2 $ymm3 $ymm4 $ymm5 $ymm6 $ymm7 $ymm8 $ymm9 $ymm10 $ymm11 $ymm12 $ymm13 $ymm14 $ymm15 $ymm16 $ymm17 $ymm18 $ymm19 $ymm20 $ymm21 $ymm22 $ymm23 $ymm24 $ymm25 $ymm26 $ymm27 $ymm28 $ymm29 $ymm30 $ymm31 $zmm0 $zmm1 $zmm2 $zmm3 $zmm4 $zmm5 $zmm6 $zmm7 $zmm8 $zmm9 $zmm10 $zmm11 $zmm12 $zmm13 $zmm14 $zmm15 $zmm16 $zmm17 $zmm18 $zmm19 $zmm20 $zmm21 $zmm22 $zmm23 $zmm24 $zmm25 $zmm26 $zmm27 $zmm28 $zmm29 $zmm30 $zmm31 $r11b $r16b $r17b $r18b $r19b $r20b $r21b $r22b $r23b $r24b $r25b $r26b $r27b $r28b $r29b $r30b $r31b $r11bh $r16bh $r17bh $r18bh $r19bh $r20bh $r21bh $r22bh $r23bh $r24bh $r25bh $r26bh $r27bh $r28bh $r29bh $r30bh $r31bh $r11d $r16d $r17d $r18d $r19d $r20d $r21d $r22d $r23d $r24d $r25d $r26d $r27d $r28d $r29d $r30d $r31d $r11w $r16w $r17w $r18w $r19w $r20w $r21w $r22w $r23w $r24w $r25w $r26w $r27w $r28w $r29w $r30w $r31w $r11wh $r16wh $r17wh $r18wh $r19wh $r20wh $r21wh $r22wh $r23wh $r24wh $r25wh $r26wh $r27wh $r28wh $r29wh $r30wh $r31wh $k0_k1 $k2_k3 $k4_k5 $k6_k7
call void @bar1()
call void @bar2()
ret void
diff --git a/llvm/test/CodeGen/X86/musttail-varargs.ll b/llvm/test/CodeGen/X86/musttail-varargs.ll
index ce672a70b1f9126..d3ded0b2a03d871 100644
--- a/llvm/test/CodeGen/X86/musttail-varargs.ll
+++ b/llvm/test/CodeGen/X86/musttail-varargs.ll
@@ -37,6 +37,7 @@ define void @f_thunk(ptr %this, ...) {
; LINUX-NEXT: .cfi_offset %r14, -32
; LINUX-NEXT: .cfi_offset %r15, -24
; LINUX-NEXT: .cfi_offset %rbp, -16
+; LINUX-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill
; LINUX-NEXT: movaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUX-NEXT: movaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUX-NEXT: movaps %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
@@ -45,12 +46,11 @@ define void @f_thunk(ptr %this, ...) {
; LINUX-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUX-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; LINUX-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
-; LINUX-NEXT: movl %eax, %ebp
-; LINUX-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; LINUX-NEXT: movq %r8, %r14
-; LINUX-NEXT: movq %rcx, %r15
-; LINUX-NEXT: movq %rdx, %r12
-; LINUX-NEXT: movq %rsi, %r13
+; LINUX-NEXT: movq %r9, %r14
+; LINUX-NEXT: movq %r8, %r15
+; LINUX-NEXT: movq %rcx, %r12
+; LINUX-NEXT: movq %rdx, %r13
+; LINUX-NEXT: movq %rsi, %rbp
; LINUX-NEXT: movq %rdi, %rbx
; LINUX-NEXT: movq %rsi, {{[0-9]+}}(%rsp)
; LINUX-NEXT: movq %rdx, {{[0-9]+}}(%rsp)
@@ -78,12 +78,12 @@ define void @f_thunk(ptr %this, ...) {
; LINUX-NEXT: callq get_f at PLT
; LINUX-NEXT: movq %rax, %r11
; LINUX-NEXT: movq %rbx, %rdi
-; LINUX-NEXT: movq %r13, %rsi
-; LINUX-NEXT: movq %r12, %rdx
-; LINUX-NEXT: movq %r15, %rcx
-; LINUX-NEXT: movq %r14, %r8
-; LINUX-NEXT: movl %ebp, %eax
-; LINUX-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
+; LINUX-NEXT: movq %rbp, %rsi
+; LINUX-NEXT: movq %r13, %rdx
+; LINUX-NEXT: movq %r12, %rcx
+; LINUX-NEXT: movq %r15, %r8
+; LINUX-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
+; LINUX-NEXT: movq %r14, %r9
; LINUX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; LINUX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; LINUX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
@@ -130,6 +130,7 @@ define void @f_thunk(ptr %this, ...) {
; LINUX-X32-NEXT: .cfi_offset %r14, -32
; LINUX-X32-NEXT: .cfi_offset %r15, -24
; LINUX-X32-NEXT: .cfi_offset %rbp, -16
+; LINUX-X32-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; LINUX-X32-NEXT: movaps %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; LINUX-X32-NEXT: movaps %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; LINUX-X32-NEXT: movaps %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
@@ -138,12 +139,11 @@ define void @f_thunk(ptr %this, ...) {
; LINUX-X32-NEXT: movaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; LINUX-X32-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; LINUX-X32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
-; LINUX-X32-NEXT: movl %eax, %ebp
-; LINUX-X32-NEXT: movq %r9, {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Spill
-; LINUX-X32-NEXT: movq %r8, %r14
-; LINUX-X32-NEXT: movq %rcx, %r15
-; LINUX-X32-NEXT: movq %rdx, %r12
-; LINUX-X32-NEXT: movq %rsi, %r13
+; LINUX-X32-NEXT: movq %r9, %r14
+; LINUX-X32-NEXT: movq %r8, %r15
+; LINUX-X32-NEXT: movq %rcx, %r12
+; LINUX-X32-NEXT: movq %rdx, %r13
+; LINUX-X32-NEXT: movq %rsi, %rbp
; LINUX-X32-NEXT: movq %rdi, %rbx
; LINUX-X32-NEXT: movq %rsi, {{[0-9]+}}(%esp)
; LINUX-X32-NEXT: movq %rdx, {{[0-9]+}}(%esp)
@@ -171,12 +171,12 @@ define void @f_thunk(ptr %this, ...) {
; LINUX-X32-NEXT: callq get_f at PLT
; LINUX-X32-NEXT: movl %eax, %r11d
; LINUX-X32-NEXT: movq %rbx, %rdi
-; LINUX-X32-NEXT: movq %r13, %rsi
-; LINUX-X32-NEXT: movq %r12, %rdx
-; LINUX-X32-NEXT: movq %r15, %rcx
-; LINUX-X32-NEXT: movq %r14, %r8
-; LINUX-X32-NEXT: movl %ebp, %eax
-; LINUX-X32-NEXT: movq {{[-0-9]+}}(%e{{[sb]}}p), %r9 # 8-byte Reload
+; LINUX-X32-NEXT: movq %rbp, %rsi
+; LINUX-X32-NEXT: movq %r13, %rdx
+; LINUX-X32-NEXT: movq %r12, %rcx
+; LINUX-X32-NEXT: movq %r15, %r8
+; LINUX-X32-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; LINUX-X32-NEXT: movq %r14, %r9
; LINUX-X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; LINUX-X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
; LINUX-X32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
diff --git a/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir b/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir
index 4efddd57cf5b0b1..e2e963d93d7a4c0 100644
--- a/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir
+++ b/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir
@@ -351,7 +351,7 @@ body: |
; CHECK-NEXT: CMP64rr [[NOT64r2]], [[COPY6]], implicit-def $eflags
; CHECK-NEXT: undef [[MOV32ri2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32ri 0
; CHECK-NEXT: [[CMOV64rr:%[0-9]+]]:gr64 = CMOV64rr [[CMOV64rr]], [[MOV32ri2]], 4, implicit killed $eflags
- ; CHECK-NEXT: INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4521993 /* reguse:GR64 */, [[MOV32ri2]], 4521993 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags
+ ; CHECK-NEXT: INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4784137 /* reguse:GR64 */, [[MOV32ri2]], 4784137 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags
; CHECK-NEXT: LCMPXCHG32 undef %67:gr64, 1, $noreg, 0, $noreg, [[COPY5]], implicit-def dead $eax, implicit-def dead $eflags, implicit undef $eax :: (load store acquire monotonic (s32) on `ptr addrspace(1) undef`, addrspace 1)
; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
; CHECK-NEXT: $rdi = COPY [[COPY4]]
@@ -471,7 +471,7 @@ body: |
%63:gr64 = NOT64r %63
CMP64rr %63, %31, implicit-def $eflags
%63:gr64 = CMOV64rr %63, %53, 4, implicit killed $eflags
- INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4521993 /* reguse:GR64 */, %53, 4521993 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags
+ INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4784137 /* reguse:GR64 */, %53, 4784137 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags
LCMPXCHG32 undef %67:gr64, 1, $noreg, 0, $noreg, %65, implicit-def dead $eax, implicit-def dead $eflags, implicit undef $eax :: (load store acquire monotonic (s32) on `ptr addrspace(1) undef`, addrspace 1)
ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
$rdi = COPY %64
diff --git a/llvm/test/MC/AsmParser/seh-directive-errors.s b/llvm/test/MC/AsmParser/seh-directive-errors.s
index 8f4cc211d1c0296..6d1486916caf3f7 100644
--- a/llvm/test/MC/AsmParser/seh-directive-errors.s
+++ b/llvm/test/MC/AsmParser/seh-directive-errors.s
@@ -105,7 +105,7 @@ h: # @h
i:
.seh_proc i
pushq %rbp
- .seh_pushreg 17
+ .seh_pushreg 32
# CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: incorrect register number for use with this directive
pushq %rbx
.seh_pushreg %xmm0
diff --git a/llvm/test/MC/X86/apx/cfi-reg.s b/llvm/test/MC/X86/apx/cfi-reg.s
new file mode 100644
index 000000000000000..ee1e76bd20007f1
--- /dev/null
+++ b/llvm/test/MC/X86/apx/cfi-reg.s
@@ -0,0 +1,41 @@
+// RUN: llvm-mc -triple x86_64-pc-linux-gnu %s -o - | FileCheck %s
+
+f:
+ .cfi_startproc
+ .cfi_offset 130, -120
+ .cfi_offset 131, -112
+ .cfi_offset 132, -104
+ .cfi_offset 133, -96
+ .cfi_offset 134, -88
+ .cfi_offset 135, -80
+ .cfi_offset 136, -72
+ .cfi_offset 137, -64
+ .cfi_offset 138, -56
+ .cfi_offset 139, -48
+ .cfi_offset 140, -40
+ .cfi_offset 141, -32
+ .cfi_offset 142, -24
+ .cfi_offset 143, -16
+ .cfi_offset 144, -8
+ .cfi_offset 145, 0
+ .cfi_endproc
+
+// CHECK: f:
+// CHECK-NEXT: .cfi_startproc
+// CHECK-NEXT: .cfi_offset %r16, -120
+// CHECK-NEXT: .cfi_offset %r17, -112
+// CHECK-NEXT: .cfi_offset %r18, -104
+// CHECK-NEXT: .cfi_offset %r19, -96
+// CHECK-NEXT: .cfi_offset %r20, -88
+// CHECK-NEXT: .cfi_offset %r21, -80
+// CHECK-NEXT: .cfi_offset %r22, -72
+// CHECK-NEXT: .cfi_offset %r23, -64
+// CHECK-NEXT: .cfi_offset %r24, -56
+// CHECK-NEXT: .cfi_offset %r25, -48
+// CHECK-NEXT: .cfi_offset %r26, -40
+// CHECK-NEXT: .cfi_offset %r27, -32
+// CHECK-NEXT: .cfi_offset %r28, -24
+// CHECK-NEXT: .cfi_offset %r29, -16
+// CHECK-NEXT: .cfi_offset %r30, -8
+// CHECK-NEXT: .cfi_offset %r31, 0
+// CHECK-NEXT: .cfi_endproc
More information about the llvm-commits
mailing list